From 8391a859f9860ea0fa9cd4311c56d46fd043471b Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Tue, 28 Apr 2026 14:19:53 +0530 Subject: [PATCH 01/17] feat: add icebug-disk tables --- src/binder/bind/bind_ddl.cpp | 42 +- src/catalog/catalog.cpp | 4 +- .../node_table_catalog_entry.cpp | 9 +- .../catalog_entry/rel_group_catalog_entry.cpp | 16 +- .../binder/ddl/bound_create_table_info.h | 21 +- .../catalog_entry/node_table_catalog_entry.h | 6 +- .../catalog_entry/rel_group_catalog_entry.h | 6 + src/include/common/constants.h | 3 + .../storage/table/ice_disk_node_table.h | 110 +++++ .../storage/table/ice_disk_rel_table.h | 138 ++++++ src/include/storage/table/table.h | 1 + .../operator/scan/scan_multi_rel_tables.cpp | 11 +- .../operator/scan/scan_node_table.cpp | 53 ++- .../operator/scan/scan_rel_table.cpp | 6 + src/storage/storage_manager.cpp | 34 +- src/storage/table/CMakeLists.txt | 2 + src/storage/table/ice_disk_node_table.cpp | 209 +++++++++ src/storage/table/ice_disk_rel_table.cpp | 399 ++++++++++++++++++ test/include/test_runner/test_group.h | 3 +- test/storage/CMakeLists.txt | 1 + test/storage/ice_disk_test.cpp | 157 +++++++ .../test_files/demo_db/demo_db_graph_std.test | 2 +- test/test_files/demo_db/demo_db_ice_disk.test | 65 +++ test/test_runner/test_parser.cpp | 3 + 24 files changed, 1266 insertions(+), 35 deletions(-) create mode 100644 src/include/storage/table/ice_disk_node_table.h create mode 100644 src/include/storage/table/ice_disk_rel_table.h create mode 100644 src/storage/table/ice_disk_node_table.cpp create mode 100644 src/storage/table/ice_disk_rel_table.cpp create mode 100644 test/storage/ice_disk_test.cpp create mode 100644 test/test_files/demo_db/demo_db_ice_disk.test diff --git a/src/binder/bind/bind_ddl.cpp b/src/binder/bind/bind_ddl.cpp index 30c8413be4..8e729208aa 100644 --- a/src/binder/bind/bind_ddl.cpp +++ b/src/binder/bind/bind_ddl.cpp @@ -170,6 +170,31 @@ static std::string getStorage(const case_insensitive_map_t& options) { if (options.contains(TableOptionConstants::REL_STORAGE_OPTION)) { return options.at(TableOptionConstants::REL_STORAGE_OPTION).toString(); } + + return ""; +} + +static std::string getTablePath(const case_insensitive_map_t& options) { + if (options.contains(TableOptionConstants::TABLE_PATH)) { + return options.at(TableOptionConstants::TABLE_PATH).toString(); + } + + return ""; +} + +static std::string getIndicesPath(const case_insensitive_map_t& options) { + if (options.contains(TableOptionConstants::INDICES_OPTION)) { + return options.at(TableOptionConstants::INDICES_OPTION).toString(); + } + + return ""; +} + +static std::string getIndptrPath(const case_insensitive_map_t& options) { + if (options.contains(TableOptionConstants::INDPTR_OPTION)) { + return options.at(TableOptionConstants::INDPTR_OPTION).toString(); + } + return ""; } @@ -187,8 +212,9 @@ BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info validatePrimaryKey(extraInfo.pKName, propertyDefinitions); auto boundOptions = bindParsingOptions(extraInfo.options); auto storage = getStorage(boundOptions); + auto tablePath = getTablePath(boundOptions); auto boundExtraInfo = std::make_unique(extraInfo.pKName, - std::move(propertyDefinitions), std::move(storage)); + std::move(propertyDefinitions), std::move(storage), std::move(tablePath)); return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName, info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry()); } @@ -211,6 +237,8 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* auto boundOptions = bindParsingOptions(extraInfo.options); auto storageDirection = getStorageDirection(boundOptions); auto storage = getStorage(boundOptions); + auto indicesPath = getIndicesPath(boundOptions); + auto indptrPath = getIndptrPath(boundOptions); std::optional scanFunction = std::nullopt; std::optional> scanBindData = std::nullopt; std::string foreignDatabaseName; @@ -310,6 +338,16 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* } } + // For icebug-disk rel tables, validate that FROM and TO are icebug-disk tables + if (storage.find("icebug-disk") != std::string::npos) { + auto srcNodeEntry = srcEntry->ptrCast(); + auto dstNodeEntry = dstEntry->ptrCast(); + if (srcNodeEntry->getStorage().find("icebug-disk") == std::string::npos || + dstNodeEntry->getStorage().find("icebug-disk") == std::string::npos) { + throw BinderException("icebug-disk rel tables require both FROM and TO tables to be icebug-disk node tables."); + } + } + // Use the actual shadow table IDs, not FOREIGN_TABLE_ID // The shadow tables allow the query planner to distinguish between different node tables auto srcTableID = srcEntry->getTableID(); @@ -324,7 +362,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* } auto boundExtraInfo = std::make_unique( std::move(propertyDefinitions), srcMultiplicity, dstMultiplicity, storageDirection, - std::move(nodePairs), std::move(storage), std::move(scanFunction), std::move(scanBindData), + std::move(nodePairs), std::move(storage), std::move(indicesPath), std::move(indptrPath), std::move(scanFunction), std::move(scanBindData), std::move(foreignDatabaseName)); return BoundCreateTableInfo(CatalogEntryType::REL_GROUP_ENTRY, info->tableName, info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry()); diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 930dbc2e68..29e89205d5 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -205,7 +205,7 @@ CatalogEntry* Catalog::createRelGroupEntry(Transaction* transaction, } auto relGroupEntry = std::make_unique(info.tableName, extraInfo->srcMultiplicity, extraInfo->dstMultiplicity, extraInfo->storageDirection, - std::move(relTableInfos), extraInfo->storage, extraInfo->scanFunction, + std::move(relTableInfos), extraInfo->storage, extraInfo->indicesPath, extraInfo->indptrPath, extraInfo->scanFunction, std::move(extraInfo->scanBindData), extraInfo->foreignDatabaseName); for (auto& definition : extraInfo->propertyDefinitions) { relGroupEntry->addProperty(definition); @@ -561,7 +561,7 @@ CatalogEntry* Catalog::createNodeTableEntry(Transaction* transaction, const BoundCreateTableInfo& info) { const auto extraInfo = info.extraInfo->constPtrCast(); auto entry = std::make_unique(info.tableName, extraInfo->primaryKeyName, - extraInfo->storage); + extraInfo->storage, extraInfo->tablePath); for (auto& definition : extraInfo->propertyDefinitions) { entry->addProperty(definition); } diff --git a/src/catalog/catalog_entry/node_table_catalog_entry.cpp b/src/catalog/catalog_entry/node_table_catalog_entry.cpp index ee7170fc13..0b0822c57c 100644 --- a/src/catalog/catalog_entry/node_table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/node_table_catalog_entry.cpp @@ -24,6 +24,8 @@ void NodeTableCatalogEntry::serialize(common::Serializer& serializer) const { serializer.write(primaryKeyName); serializer.writeDebuggingInfo("storage"); serializer.write(storage); + serializer.writeDebuggingInfo("tablePath"); + serializer.write(tablePath); } std::unique_ptr NodeTableCatalogEntry::deserialize( @@ -35,9 +37,13 @@ std::unique_ptr NodeTableCatalogEntry::deserialize( deserializer.deserializeValue(primaryKeyName); deserializer.validateDebuggingInfo(debuggingInfo, "storage"); deserializer.deserializeValue(storage); + std::string tablePath; + deserializer.validateDebuggingInfo(debuggingInfo, "tablePath"); + deserializer.deserializeValue(tablePath); auto nodeTableEntry = std::make_unique(); nodeTableEntry->primaryKeyName = primaryKeyName; nodeTableEntry->storage = storage; + nodeTableEntry->tablePath = tablePath; return nodeTableEntry; } @@ -66,6 +72,7 @@ std::unique_ptr NodeTableCatalogEntry::copy() const { auto other = std::make_unique(); other->primaryKeyName = primaryKeyName; other->storage = storage; + other->tablePath = tablePath; other->scanFunction = scanFunction; other->createBindDataFunc = createBindDataFunc; other->foreignDatabaseName = foreignDatabaseName; @@ -76,7 +83,7 @@ std::unique_ptr NodeTableCatalogEntry::copy() const { std::unique_ptr NodeTableCatalogEntry::getBoundExtraCreateInfo( transaction::Transaction*) const { return std::make_unique(primaryKeyName, - copyVector(getProperties()), storage); + copyVector(getProperties()), storage, tablePath); } } // namespace catalog diff --git a/src/catalog/catalog_entry/rel_group_catalog_entry.cpp b/src/catalog/catalog_entry/rel_group_catalog_entry.cpp index 46da2a4fbe..c172b8e7ad 100644 --- a/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +++ b/src/catalog/catalog_entry/rel_group_catalog_entry.cpp @@ -98,6 +98,10 @@ void RelGroupCatalogEntry::serialize(Serializer& serializer) const { serializer.serializeValue(storageDirection); serializer.writeDebuggingInfo("storage"); serializer.serializeValue(storage); + serializer.writeDebuggingInfo("indicesPath"); + serializer.serializeValue(indicesPath); + serializer.writeDebuggingInfo("indptrPath"); + serializer.serializeValue(indptrPath); serializer.writeDebuggingInfo("scanFunction"); serializer.serializeValue(scanFunction.has_value()); if (scanFunction.has_value()) { @@ -123,6 +127,12 @@ std::unique_ptr RelGroupCatalogEntry::deserialize( deserializer.deserializeValue(storageDirection); deserializer.validateDebuggingInfo(debuggingInfo, "storage"); deserializer.deserializeValue(storage); + std::string indicesPath; + deserializer.validateDebuggingInfo(debuggingInfo, "indicesPath"); + deserializer.deserializeValue(indicesPath); + std::string indptrPath; + deserializer.validateDebuggingInfo(debuggingInfo, "indptrPath"); + deserializer.deserializeValue(indptrPath); deserializer.validateDebuggingInfo(debuggingInfo, "scanFunction"); bool hasScanFunction; deserializer.deserializeValue(hasScanFunction); @@ -137,6 +147,8 @@ std::unique_ptr RelGroupCatalogEntry::deserialize( relGroupEntry->dstMultiplicity = dstMultiplicity; relGroupEntry->storageDirection = storageDirection; relGroupEntry->storage = storage; + relGroupEntry->indicesPath = indicesPath; + relGroupEntry->indptrPath = indptrPath; relGroupEntry->scanFunction = scanFunction; relGroupEntry->relTableInfos = relTableInfos; return relGroupEntry; @@ -198,6 +210,8 @@ std::unique_ptr RelGroupCatalogEntry::copy() const { other->dstMultiplicity = dstMultiplicity; other->storageDirection = storageDirection; other->storage = storage; + other->indicesPath = indicesPath; + other->indptrPath = indptrPath; other->scanFunction = scanFunction; other->scanBindData = std::nullopt; // TODO: implement copy for bindData if needed other->foreignDatabaseName = foreignDatabaseName; @@ -214,7 +228,7 @@ RelGroupCatalogEntry::getBoundExtraCreateInfo(transaction::Transaction*) const { } return std::make_unique( copyVector(propertyCollection.getDefinitions()), srcMultiplicity, dstMultiplicity, - storageDirection, std::move(nodePairs)); + storageDirection, std::move(nodePairs), storage, indicesPath, indptrPath); } } // namespace catalog diff --git a/src/include/binder/ddl/bound_create_table_info.h b/src/include/binder/ddl/bound_create_table_info.h index 2db5ac5d39..0a88abe66d 100644 --- a/src/include/binder/ddl/bound_create_table_info.h +++ b/src/include/binder/ddl/bound_create_table_info.h @@ -76,14 +76,15 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo { struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo { std::string primaryKeyName; std::string storage; + std::string tablePath; BoundExtraCreateNodeTableInfo(std::string primaryKeyName, - std::vector definitions, std::string storage = "") + std::vector definitions, std::string storage = "", std::string tablePath = "") : BoundExtraCreateTableInfo{std::move(definitions)}, - primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {} + primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)}, tablePath{std::move(tablePath)} {} BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other) : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)}, - primaryKeyName{other.primaryKeyName}, storage{other.storage} {} + primaryKeyName{other.primaryKeyName}, storage{other.storage}, tablePath{other.tablePath} {} std::unique_ptr copy() const override { return std::make_unique(*this); @@ -96,6 +97,8 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo { common::ExtendDirection storageDirection; std::vector nodePairs; std::string storage; + std::string indicesPath; + std::string indptrPath; std::optional scanFunction; std::optional> scanBindData; std::string foreignDatabaseName; @@ -103,22 +106,22 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo { explicit BoundExtraCreateRelTableGroupInfo(std::vector definitions, common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection, std::vector nodePairs, - std::string storage = "", + std::string storage = "", std::string indicesPath = "", std::string indptrPath = "", std::optional scanFunction = std::nullopt, std::optional> scanBindData = std::nullopt, std::string foreignDatabaseName = "") : BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity}, dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection}, - nodePairs{std::move(nodePairs)}, storage{std::move(storage)}, - scanFunction{std::move(scanFunction)}, scanBindData{std::move(scanBindData)}, - foreignDatabaseName{std::move(foreignDatabaseName)} {} + nodePairs{std::move(nodePairs)}, storage{std::move(storage)}, indicesPath{std::move(indicesPath)}, + indptrPath{std::move(indptrPath)}, scanFunction{std::move(scanFunction)}, + scanBindData{std::move(scanBindData)}, foreignDatabaseName{std::move(foreignDatabaseName)} {} BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other) : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)}, srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity}, storageDirection{other.storageDirection}, nodePairs{other.nodePairs}, - storage{other.storage}, scanFunction{other.scanFunction}, - scanBindData{other.scanBindData}, foreignDatabaseName{other.foreignDatabaseName} {} + storage{other.storage}, indicesPath{other.indicesPath}, indptrPath{other.indptrPath}, + scanFunction{other.scanFunction}, scanBindData{other.scanBindData}, foreignDatabaseName{other.foreignDatabaseName} {} std::unique_ptr copy() const override { return std::make_unique(*this); diff --git a/src/include/catalog/catalog_entry/node_table_catalog_entry.h b/src/include/catalog/catalog_entry/node_table_catalog_entry.h index 32dfd20a82..5bc1252b3a 100644 --- a/src/include/catalog/catalog_entry/node_table_catalog_entry.h +++ b/src/include/catalog/catalog_entry/node_table_catalog_entry.h @@ -27,9 +27,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry { public: NodeTableCatalogEntry() = default; - NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "") + NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "", std::string tablePath = "") : TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)}, - storage{std::move(storage)} {} + storage{std::move(storage)}, tablePath{std::move(tablePath)} {} // Constructor for foreign-backed tables NodeTableCatalogEntry(std::string name, std::string primaryKeyName, @@ -56,6 +56,7 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry { return getProperty(primaryKeyName); } const std::string& getStorage() const { return storage; } + const std::string& getTablePath() const { return tablePath; } std::optional getScanFunction() const override; const CreateBindDataFunc& getCreateBindDataFunc() const { return createBindDataFunc; } const std::string& getForeignDatabaseName() const { return foreignDatabaseName; } @@ -82,6 +83,7 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry { private: std::string primaryKeyName; std::string storage; + std::string tablePath; std::optional scanFunction; CreateBindDataFunc createBindDataFunc; // Callback to create bind data std::string foreignDatabaseName; diff --git a/src/include/catalog/catalog_entry/rel_group_catalog_entry.h b/src/include/catalog/catalog_entry/rel_group_catalog_entry.h index 807dc8f002..dc35de6812 100644 --- a/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +++ b/src/include/catalog/catalog_entry/rel_group_catalog_entry.h @@ -39,12 +39,14 @@ class LBUG_API RelGroupCatalogEntry final : public TableCatalogEntry { RelGroupCatalogEntry(std::string tableName, common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection, std::vector relTableInfos, std::string storage = "", + std::string indicesPath = "", std::string indptrPath = "", std::optional scanFunction = std::nullopt, std::optional> scanBindData = std::nullopt, std::string foreignDatabaseName = "") : TableCatalogEntry{type_, std::move(tableName)}, srcMultiplicity{srcMultiplicity}, dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection}, relTableInfos{std::move(relTableInfos)}, storage{std::move(storage)}, + indicesPath{std::move(indicesPath)}, indptrPath{std::move(indptrPath)}, scanFunction{std::move(scanFunction)}, scanBindData{std::move(scanBindData)}, foreignDatabaseName{std::move(foreignDatabaseName)} { propertyCollection = @@ -63,6 +65,8 @@ class LBUG_API RelGroupCatalogEntry final : public TableCatalogEntry { common::ExtendDirection getStorageDirection() const { return storageDirection; } const std::string& getStorage() const { return storage; } + const std::string& getIndicesPath() const { return indicesPath; } + const std::string& getIndptrPath() const { return indptrPath; } std::optional getScanFunction() const override { return scanFunction; } const std::optional>& getScanBindData() const { return scanBindData; @@ -113,6 +117,8 @@ class LBUG_API RelGroupCatalogEntry final : public TableCatalogEntry { common::ExtendDirection storageDirection = common::ExtendDirection::BOTH; std::vector relTableInfos; std::string storage; + std::string indicesPath; + std::string indptrPath; std::optional scanFunction; std::optional> scanBindData; std::string foreignDatabaseName; // Database name for foreign-backed rel tables diff --git a/src/include/common/constants.h b/src/include/common/constants.h index dcbbb43b53..53a3241530 100644 --- a/src/include/common/constants.h +++ b/src/include/common/constants.h @@ -86,6 +86,9 @@ struct StorageConstants { struct TableOptionConstants { static constexpr char REL_STORAGE_DIRECTION_OPTION[] = "STORAGE_DIRECTION"; static constexpr char REL_STORAGE_OPTION[] = "STORAGE"; + static constexpr char TABLE_PATH[] = "TABLE_PATH"; + static constexpr char INDICES_OPTION[] = "INDICES"; + static constexpr char INDPTR_OPTION[] = "INDPTR"; }; // Hash Index Configurations diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h new file mode 100644 index 0000000000..e17d0bc38d --- /dev/null +++ b/src/include/storage/table/ice_disk_node_table.h @@ -0,0 +1,110 @@ +#pragma once + +#include +#include + +#include "storage/table/node_table.h" +#include "processor/operator/persistent/reader/parquet/parquet_reader.h" + +namespace lbug { +namespace storage { + +class IceDiskNodeTable; + +struct IceDiskNodeTableScanState : public TableScanState { + std::unique_ptr parquetReader; + std::unique_ptr parquetScanState; + std::vector columnSkips; + bool scanCompleted = false; + uint64_t currentStartRow = 0; + uint64_t currentNumRows = 0; + uint64_t currentRowOffset = 0; + + IceDiskNodeTableScanState(common::ValueVector* nodeIDVector, + std::vector outputVectors, + std::shared_ptr outChunkState) + : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { + parquetScanState = std::make_unique(); + } + + const catalog::NodeTableCatalogEntry* getNodeTableCatalogEntry() const; + + void setToTable(const transaction::Transaction* transaction, Table* table_, + std::vector columnIDs_, + std::vector columnPredicateSets_ = {}, + common::RelDataDirection direction = common::RelDataDirection::FWD) override; +}; + +struct IceDiskNodeTableScanSharedState { +private: + std::mutex mtx; + std::vector rowGroupRows; + std::vector rowGroupStartRows; + common::node_group_idx_t currentMorselIdx = 0; + +public: + IceDiskNodeTableScanSharedState() {} + + void reset(std::vector rows, std::vector startRows) { + std::lock_guard lock(mtx); + this->rowGroupRows = std::move(rows); + this->rowGroupStartRows = std::move(startRows); + this->currentMorselIdx = 0; + } + + bool getNextMorsel(IceDiskNodeTableScanState* scanState) { + std::lock_guard lock(mtx); + if (currentMorselIdx < rowGroupRows.size()) { + scanState->nodeGroupIdx = currentMorselIdx; + scanState->currentNumRows = rowGroupRows[currentMorselIdx]; + scanState->currentStartRow = rowGroupStartRows[currentMorselIdx]; + currentMorselIdx++; + return true; + } + return false; + } + + void resetMorsel() { + std::lock_guard lock(mtx); + currentMorselIdx = 0; + } + + size_t getNumMorsels() const { return rowGroupRows.size(); } +}; + +class IceDiskNodeTable final : public NodeTable { +public: + IceDiskNodeTable(const StorageManager* storageManager, + const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager); + + void initializeScanCoordination(const transaction::Transaction* transaction) override; + + void initScanState(transaction::Transaction* transaction, TableScanState& scanState, + bool resetCachedBoundNodeSelVec = true) const override; + + bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override; + + void insert(transaction::Transaction*, TableInsertState&) override { + throw common::RuntimeException("Cannot insert into icebug-disk-backed node table"); + } + void update(transaction::Transaction*, TableUpdateState&) override { + throw common::RuntimeException("Cannot update icebug-disk-backed node table"); + } + bool delete_(transaction::Transaction*, TableDeleteState&) override { + throw common::RuntimeException("Cannot delete from icebug-disk-backed node table"); + } + + common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override; + + const std::string& getParquetFilePath() const { return parquetFilePath; } + const catalog::NodeTableCatalogEntry* getNodeTableCatalogEntry() const { return nodeTableCatalogEntry; } + IceDiskNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } + +private: + std::string parquetFilePath; + const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; + mutable std::unique_ptr tableScanSharedState; +}; + +} // namespace storage +} // namespace lbug diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h new file mode 100644 index 0000000000..636010339c --- /dev/null +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -0,0 +1,138 @@ +#pragma once + +#include + +#include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "common/exception/runtime.h" +#include "common/types/value/value.h" +#include "processor/operator/persistent/reader/parquet/parquet_reader.h" +#include "storage/table/rel_table.h" + +namespace lbug { +namespace storage { + +class IceDiskRelTable; + +struct PendingIceDiskRelRow { + common::sel_t boundNodeSelPos = 0; + common::nodeID_t nbrNodeID; + common::internalID_t relID; + std::vector> propertyValues; + + PendingIceDiskRelRow() = default; + PendingIceDiskRelRow(PendingIceDiskRelRow&&) = default; + PendingIceDiskRelRow& operator=(PendingIceDiskRelRow&&) = default; +}; + +struct IceDiskRelTableScanState : public RelTableScanState { + std::unique_ptr indicesReader; + std::unique_ptr parquetScanState; + std::vector outputColumnIdx; + std::vector columnSkips; + bool scanCompleted = false; + uint64_t currentStartRow = 0; + uint64_t currentNumRows = 0; + uint64_t currentGlobalRowIdx = 0; + uint64_t nextRowGroupIdx = 0; + std::vector pendingRows; + uint64_t nextPendingRowIdx = 0; + + IceDiskRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector, + std::vector outputVectors, + std::shared_ptr outChunkState) + : RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { + parquetScanState = std::make_unique(); + } + + void setToTable(const transaction::Transaction* transaction, Table* table_, + std::vector columnIDs_, + std::vector columnPredicateSets_ = {}, + common::RelDataDirection direction_ = common::RelDataDirection::FWD) override; +}; + +struct IceDiskRelTableScanSharedState { +private: + std::mutex mtx; + std::vector rowGroupStartRows; + std::vector rowGroupNumRows; + common::node_group_idx_t currentRowGroupIdx = 0; + +public: + IceDiskRelTableScanSharedState() {} + + void reset(std::vector startRows, std::vector numRows) { + std::lock_guard lock(mtx); + this->rowGroupStartRows = std::move(startRows); + this->rowGroupNumRows = std::move(numRows); + this->currentRowGroupIdx = 0; + } + + bool getNextMorsel(IceDiskRelTableScanState* scanState, uint64_t& startRow, uint64_t& numRows) { + std::lock_guard lock(mtx); + if (currentRowGroupIdx < rowGroupStartRows.size()) { + scanState->nodeGroupIdx = currentRowGroupIdx; + startRow = rowGroupStartRows[currentRowGroupIdx]; + numRows = rowGroupNumRows[currentRowGroupIdx]; + currentRowGroupIdx++; + return true; + } + return false; + } + + bool getMorsel(common::node_group_idx_t morselIdx, uint64_t& startRow, uint64_t& numRows) { + std::lock_guard lock(mtx); + if (morselIdx >= rowGroupStartRows.size()) { + return false; + } + startRow = rowGroupStartRows[morselIdx]; + numRows = rowGroupNumRows[morselIdx]; + return true; + } + + common::node_group_idx_t getNumMorsels() const { return rowGroupStartRows.size(); } +}; + +class IceDiskRelTable final : public RelTable { +public: + IceDiskRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, + common::table_id_t toTableID, const StorageManager* storageManager, + MemoryManager* memoryManager); + + void initializeScanCoordination(const transaction::Transaction* transaction) override; + + void initScanState(transaction::Transaction* transaction, TableScanState& scanState, + bool resetCachedBoundNodeSelVec = true) const override; + + bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override; + + void insert(transaction::Transaction*, TableInsertState&) override { + throw common::RuntimeException("Cannot insert into icebug-disk-backed rel table"); + } + void update(transaction::Transaction*, TableUpdateState&) override { + throw common::RuntimeException("Cannot update icebug-disk-backed rel table"); + } + bool delete_(transaction::Transaction*, TableDeleteState&) override { + throw common::RuntimeException("Cannot delete from icebug-disk-backed rel table"); + } + + common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override; + + const std::string& getIndicesFilePath() const { return indicesFilePath; } + const std::string& getIndptrFilePath() const { return indptrFilePath; } + const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { return relGroupCatalogEntry; } + IceDiskRelTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } + +private: + std::string indicesFilePath; + std::string indptrFilePath; + const catalog::RelGroupCatalogEntry* relGroupCatalogEntry; + mutable std::unique_ptr tableScanSharedState; + mutable std::mutex indptrDataMutex; + mutable std::vector indptrData; + + void loadIndptrData(transaction::Transaction* transaction) const; + common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const; +}; + +} // namespace storage +} // namespace lbug diff --git a/src/include/storage/table/table.h b/src/include/storage/table/table.h index fc14653c61..12ea334809 100644 --- a/src/include/storage/table/table.h +++ b/src/include/storage/table/table.h @@ -164,6 +164,7 @@ class LBUG_API Table { // Note that `resetCachedBoundNodeIDs` is only applicable to RelTable for now. virtual void initScanState(transaction::Transaction* transaction, TableScanState& readState, bool resetCachedBoundNodeSelVec = true) const = 0; + virtual void initializeScanCoordination(const transaction::Transaction* /*transaction*/) {} bool scan(transaction::Transaction* transaction, TableScanState& scanState); virtual void initInsertState(main::ClientContext* context, TableInsertState& insertState) = 0; diff --git a/src/processor/operator/scan/scan_multi_rel_tables.cpp b/src/processor/operator/scan/scan_multi_rel_tables.cpp index 27f5422379..52642a62be 100644 --- a/src/processor/operator/scan/scan_multi_rel_tables.cpp +++ b/src/processor/operator/scan/scan_multi_rel_tables.cpp @@ -3,6 +3,7 @@ #include "processor/execution_context.h" #include "storage/local_storage/local_storage.h" #include "storage/table/arrow_rel_table.h" +#include "storage/table/ice_disk_rel_table.h" #include "storage/table/parquet_rel_table.h" using namespace lbug::common; @@ -69,6 +70,7 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo // Check if any table in any scanner is an external rel table with a custom scan state. bool hasArrowTable = false; bool hasParquetTable = false; + bool hasIceDiskTable = false; for (auto& [_, scanner] : scanners) { for (auto& relInfo : scanner.relInfos) { if (dynamic_cast(relInfo.table) != nullptr) { @@ -79,8 +81,12 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo hasParquetTable = true; break; } + if (dynamic_cast(relInfo.table) != nullptr) { + hasIceDiskTable = true; + break; + } } - if (hasArrowTable || hasParquetTable) { + if (hasArrowTable || hasParquetTable || hasIceDiskTable) { break; } } @@ -95,6 +101,9 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); + } else if (hasIceDiskTable) { + scanState = std::make_unique(*MemoryManager::Get(*clientContext), + boundNodeIDVector, outVectors, nbrNodeIDVector->state); } else { scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index b6e8fb175d..4c5a71ab7a 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -7,6 +7,7 @@ #include "storage/local_storage/local_node_table.h" #include "storage/local_storage/local_storage.h" #include "storage/table/arrow_node_table.h" +#include "storage/table/ice_disk_node_table.h" #include "storage/table/parquet_node_table.h" using namespace lbug::common; @@ -69,6 +70,18 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa } catch (const std::exception& e) { this->numCommittedNodeGroups = 1; } + } else if (const auto iceDiskTable = dynamic_cast(table)) { + std::vector columnSkips; + try { + auto context = transaction->getClientContext(); + auto resolvedPath = + common::VirtualFileSystem::resolvePath(context, iceDiskTable->getParquetFilePath()); + auto tempReader = + std::make_unique(resolvedPath, columnSkips, context); + this->numCommittedNodeGroups = tempReader->getNumRowsGroups(); + } catch (const std::exception& e) { + this->numCommittedNodeGroups = 1; + } } else if (const auto arrowTable = dynamic_cast(table)) { // For Arrow tables, set numCommittedNodeGroups to number of morsels this->numCommittedNodeGroups = @@ -103,6 +116,18 @@ void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState, return; } + if (const auto iceDiskTable = dynamic_cast(this->table)) { + const auto tableSharedState = iceDiskTable->getTableScanSharedState(); + if (tableSharedState->getNextMorsel(static_cast(&scanState))) { + scanState.source = TableScanSource::COMMITTED; + progressSharedState.numMorselsScanned++; + } else { + scanState.source = TableScanSource::NONE; + } + + return; + } + auto& nodeScanState = scanState.cast(); if (currentCommittedGroupIdx < numCommittedNodeGroups) { @@ -137,7 +162,28 @@ void ScanNodeTableInfo::initScanState(TableScanState& scanState, void ScanNodeTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { ScanTable::initLocalStateInternal(resultSet, context); - nodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get(); + auto nodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get(); + + // Check if the first table is a ParquetNodeTable or ArrowNodeTable and create appropriate scan + // state + auto* parquetTable = dynamic_cast(tableInfos[0].table); + auto* arrowTable = dynamic_cast(tableInfos[0].table); + auto* iceDiskTable = dynamic_cast(tableInfos[0].table); + if (parquetTable) { + scanState = std::make_unique( + *MemoryManager::Get(*context->clientContext), nodeIDVector, outVectors, + nodeIDVector->state); + } else if (iceDiskTable) { + scanState = std::make_unique( + nodeIDVector, outVectors, nodeIDVector->state); + } else if (arrowTable) { + scanState = + std::make_unique(*MemoryManager::Get(*context->clientContext), + nodeIDVector, outVectors, nodeIDVector->state); + } else { + scanState = + std::make_unique(nodeIDVector, outVectors, nodeIDVector->state); + } currentTableIdx = 0; initCurrentTable(context); @@ -149,9 +195,10 @@ void ScanNodeTable::initCurrentTable(ExecutionContext* context) { outVectors, MemoryManager::Get(*context->clientContext)); currentInfo.initScanState(*scanState, outVectors, context->clientContext); scanState->semiMask = sharedStates[currentTableIdx]->getSemiMask(); - // Call table->initScanState for ParquetNodeTable or ArrowNodeTable + // Call table->initScanState for ParquetNodeTable or ArrowNodeTable or IceDiskNodeTable if (dynamic_cast(tableInfos[currentTableIdx].table) || - dynamic_cast(tableInfos[currentTableIdx].table)) { + dynamic_cast(tableInfos[currentTableIdx].table) || + dynamic_cast(tableInfos[currentTableIdx].table)) { auto transaction = transaction::Transaction::Get(*context->clientContext); tableInfos[currentTableIdx].table->initScanState(transaction, *scanState); } diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index f09b7a9abd..f736d463d2 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -9,6 +9,7 @@ #include "storage/local_storage/local_rel_table.h" #include "storage/table/arrow_rel_table.h" #include "storage/table/foreign_rel_table.h" +#include "storage/table/ice_disk_rel_table.h" #include "storage/table/node_table.h" #include "storage/table/parquet_rel_table.h" @@ -78,6 +79,7 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext auto* arrowTable = dynamic_cast(tableInfo.table); auto* parquetTable = dynamic_cast(tableInfo.table); auto* foreignTable = dynamic_cast(tableInfo.table); + auto* iceDiskTable = dynamic_cast(tableInfo.table); if (arrowTable) { scanState = std::make_unique(*MemoryManager::Get(*clientContext), @@ -86,6 +88,9 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); + } else if (iceDiskTable) { + scanState = std::make_unique(*MemoryManager::Get(*clientContext), + boundNodeIDVector, outVectors, nbrNodeIDVector->state); } else if (foreignTable) { scanState = std::make_unique(*MemoryManager::Get(*clientContext), @@ -94,6 +99,7 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); } + tableInfo.table->initializeScanCoordination(transaction::Transaction::Get(*clientContext)); tableInfo.initScanState(*scanState, outVectors, clientContext); if (sourceMode) { currentSourceTableIdx = 0; diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp index 82a22dd750..627f6d1aeb 100644 --- a/src/storage/storage_manager.cpp +++ b/src/storage/storage_manager.cpp @@ -19,6 +19,8 @@ #include "storage/table/arrow_rel_table.h" #include "storage/table/arrow_table_support.h" #include "storage/table/foreign_rel_table.h" +#include "storage/table/ice_disk_node_table.h" +#include "storage/table/ice_disk_rel_table.h" #include "storage/table/node_table.h" #include "storage/table/parquet_node_table.h" #include "storage/table/parquet_rel_table.h" @@ -98,19 +100,15 @@ void StorageManager::recover(main::ClientContext& clientContext, bool throwOnWal void StorageManager::createNodeTable(NodeTableCatalogEntry* entry) { tableNameCache[entry->getTableID()] = entry->getName(); if (!entry->getStorage().empty()) { - // Check if storage is Arrow backed - if (entry->getStorage().substr(0, 8) == "arrow://") { - // Extract Arrow ID from storage string + if (entry->getStorage().find("icebug-disk") != std::string::npos) { + tables[entry->getTableID()] = std::make_unique(this, entry, &memoryManager); + } else if (entry->getStorage().substr(0, 8) == "arrow://") { std::string arrowId = entry->getStorage().substr(8); - - // Retrieve Arrow data from registry (as pointers to registry data) ArrowSchemaWrapper* schema = nullptr; std::vector* arrays = nullptr; if (!ArrowTableSupport::getArrowData(arrowId, schema, arrays)) { throw common::RuntimeException("Failed to retrieve Arrow data for ID: " + arrowId); } - - // Create wrappers that reference registry memory while registry keeps ownership. ArrowSchemaWrapper schemaCopy = createShallowCopy(*schema); std::vector arraysCopy; arraysCopy.reserve(arrays->size()); @@ -169,6 +167,9 @@ void StorageManager::addRelTable(RelGroupCatalogEntry* entry, const RelTableCata tables[info.oid] = std::make_unique(entry, info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager, fromNodeTable, toNodeTable, std::move(schemaCopy), std::move(arraysCopy), arrowId); + } else if (entry->getStorage().find("icebug-disk") != std::string::npos) { + tables[info.oid] = std::make_unique(entry, info.nodePair.srcTableID, + info.nodePair.dstTableID, this, &memoryManager); } else { // Create parquet-backed rel table tables[info.oid] = std::make_unique(entry, info.nodePair.srcTableID, @@ -437,8 +438,12 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca ->ptrCast(); tableNameCache[tableID] = tableEntry->getName(); if (!tableEntry->getStorage().empty()) { - // Create parquet-backed node table - tables[tableID] = std::make_unique(this, tableEntry, &memoryManager); + if (tableEntry->getStorage().find("icebug-disk") != std::string::npos) { + tables[tableID] = std::make_unique(this, tableEntry, &memoryManager); + } else { + // Create parquet-backed node table + tables[tableID] = std::make_unique(this, tableEntry, &memoryManager); + } } else { // Create regular node table tables[tableID] = std::make_unique(this, tableEntry, &memoryManager); @@ -465,9 +470,14 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca RelTableCatalogInfo info = RelTableCatalogInfo::deserialize(deSer); DASSERT(!tables.contains(info.oid)); if (!relGroupEntry->getStorage().empty()) { - // Create parquet-backed rel table - tables[info.oid] = std::make_unique(relGroupEntry, - info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager); + if (relGroupEntry->getStorage().find("icebug-disk") != std::string::npos) { + tables[info.oid] = std::make_unique(relGroupEntry, + info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager); + } else { + // Create parquet-backed rel table + tables[info.oid] = std::make_unique(relGroupEntry, + info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager); + } } else { // Create regular rel table tables[info.oid] = std::make_unique(relGroupEntry, diff --git a/src/storage/table/CMakeLists.txt b/src/storage/table/CMakeLists.txt index 54a76cca23..f625831301 100644 --- a/src/storage/table/CMakeLists.txt +++ b/src/storage/table/CMakeLists.txt @@ -19,6 +19,8 @@ add_library(lbug_storage_store dictionary_chunk.cpp dictionary_column.cpp foreign_rel_table.cpp + ice_disk_node_table.cpp + ice_disk_rel_table.cpp in_mem_chunked_node_group_collection.cpp in_memory_exception_chunk.cpp lazy_segment_scanner.cpp diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp new file mode 100644 index 0000000000..07bea9584c --- /dev/null +++ b/src/storage/table/ice_disk_node_table.cpp @@ -0,0 +1,209 @@ +#include "storage/table/ice_disk_node_table.h" + +#include +#include + +#include "catalog/catalog_entry/node_table_catalog_entry.h" +#include "common/data_chunk/sel_vector.h" +#include "common/exception/runtime.h" +#include "common/file_system/virtual_file_system.h" +#include "common/types/value/value.h" +#include "main/client_context.h" +#include "processor/operator/persistent/reader/parquet/parquet_reader.h" +#include "storage/buffer_manager/memory_manager.h" +#include "storage/storage_manager.h" +#include "transaction/transaction.h" + +using namespace lbug::catalog; +using namespace lbug::common; +using namespace lbug::processor; +using namespace lbug::transaction; + +namespace lbug { +namespace storage { + +namespace { + +std::string resolveIceDiskPath(const std::string& storageRoot, const std::string& configuredPath, + const std::string& fallbackPath) { + if (configuredPath.empty()) { + return fallbackPath; + } + auto configured = std::filesystem::path{configuredPath}; + if (configured.is_absolute()) { + return configured.lexically_normal().string(); + } + if (storageRoot.empty()) { + return configured.lexically_normal().string(); + } + auto baseDir = std::filesystem::path{storageRoot}.parent_path(); + if (baseDir.empty()) { + return configured.lexically_normal().string(); + } + return (baseDir / configured).lexically_normal().string(); +} + +} // namespace + +const NodeTableCatalogEntry* IceDiskNodeTableScanState::getNodeTableCatalogEntry() const { + return table->cast().getNodeTableCatalogEntry(); +} + +void IceDiskNodeTableScanState::setToTable(const Transaction* transaction, Table* table_, + std::vector columnIDs_, std::vector columnPredicateSets_, + RelDataDirection /*direction*/) { + table = table_; + columnIDs = std::move(columnIDs_); + columnPredicateSets = std::move(columnPredicateSets_); + + auto& iceDiskTable = table->cast(); + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, iceDiskTable.getParquetFilePath()); + std::vector dummySkips; + auto tempReader = std::make_unique(resolvedPath, dummySkips, context); + processor::ParquetReaderScanState tempState; + std::vector dummyGroups; + tempReader->initializeScan(tempState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); + columnSkips.assign(tempReader->getNumColumns(), true); + + auto entry = iceDiskTable.getNodeTableCatalogEntry(); + for (auto columnID : columnIDs) { + if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { + continue; + } + + auto propertyName = entry->getProperty(columnID).getName(); + for (uint32_t j = 0; j < tempReader->getNumColumns(); ++j) { + if (tempReader->getColumnName(j) == propertyName) { + columnSkips[j] = false; + break; + } + } + } + + parquetReader = std::make_unique(resolvedPath, columnSkips, context); + processor::ParquetReaderScanState scanState; + parquetReader->initializeScan(scanState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); +} + +IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, + const NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager) + : NodeTable{storageManager, nodeTableEntry, memoryManager}, + nodeTableCatalogEntry{nodeTableEntry}, + tableScanSharedState{std::make_unique()} { + parquetFilePath = resolveIceDiskPath(nodeTableEntry->getStorage(), nodeTableEntry->getTablePath(), + nodeTableEntry->getStorage() + "_nodes_" + nodeTableEntry->getName() + ".parquet"); +} + +void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction) { + std::vector rowGroupRows; + std::vector rowGroupStartRows; + auto context = transaction->getClientContext(); + if (context) { + std::vector dummySkips; + auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); + auto tempReader = std::make_unique(resolvedPath, dummySkips, context); + auto metadata = tempReader->getMetadata(); + if (metadata) { + uint64_t currentStartRow = 0; + for (size_t i = 0; i < metadata->row_groups.size(); ++i) { + rowGroupRows.push_back(metadata->row_groups[i].num_rows); + rowGroupStartRows.push_back(currentStartRow); + currentStartRow += metadata->row_groups[i].num_rows; + } + } + } + tableScanSharedState->reset(std::move(rowGroupRows), std::move(rowGroupStartRows)); +} + +void IceDiskNodeTable::initScanState(Transaction* /*transaction*/, TableScanState& scanState, + bool /*resetCachedBoundNodeSelVec*/) const { + auto& iceDiskNodeScanState = static_cast(scanState); + iceDiskNodeScanState.source = TableScanSource::COMMITTED; + iceDiskNodeScanState.scanCompleted = false; + iceDiskNodeScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + iceDiskNodeScanState.currentRowOffset = 0; +} + +bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& scanState) { + auto& iceDiskNodeScanState = static_cast(scanState); + if (iceDiskNodeScanState.scanCompleted) { + return false; + } + + auto vfs = VirtualFileSystem::GetUnsafe(*transaction->getClientContext()); + DataChunk dataChunk(iceDiskNodeScanState.parquetReader->getNumColumns()); + for (uint32_t i = 0; i < iceDiskNodeScanState.parquetReader->getNumColumns(); ++i) { + dataChunk.insert(i, std::make_shared( + iceDiskNodeScanState.parquetReader->getColumnType(i).copy(), + MemoryManager::Get(*transaction->getClientContext()))); + } + + while (true) { + if (iceDiskNodeScanState.nodeGroupIdx == INVALID_NODE_GROUP_IDX) { + if (!tableScanSharedState->getNextMorsel(&iceDiskNodeScanState)) { + iceDiskNodeScanState.scanCompleted = true; + return false; + } + iceDiskNodeScanState.currentRowOffset = 0; + std::vector groupsToRead = {iceDiskNodeScanState.nodeGroupIdx}; + iceDiskNodeScanState.parquetReader->initializeScan(*iceDiskNodeScanState.parquetScanState, + groupsToRead, vfs); + } + + dataChunk.state->getSelVectorUnsafe().setSelSize(0); + iceDiskNodeScanState.parquetReader->scan(*iceDiskNodeScanState.parquetScanState, dataChunk); + if (dataChunk.state->getSelVector().getSelSize() == 0) { + iceDiskNodeScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + iceDiskNodeScanState.currentRowOffset = 0; + continue; + } + + scanState.resetOutVectors(); + auto selSize = dataChunk.state->getSelVector().getSelSize(); + for (uint32_t i = 0; i < iceDiskNodeScanState.columnIDs.size(); ++i) { + auto columnID = iceDiskNodeScanState.columnIDs[i]; + if (columnID == ROW_IDX_COLUMN_ID) { + for (size_t j = 0; j < selSize; ++j) { + ((row_idx_t*)iceDiskNodeScanState.outputVectors[i]->getData())[j] = + iceDiskNodeScanState.currentStartRow + iceDiskNodeScanState.currentRowOffset + j; + } + } else if (columnID != INVALID_COLUMN_ID) { + uint32_t parquetColIdx = 0; + auto propertyName = nodeTableCatalogEntry->getProperty(columnID).getName(); + for (uint32_t j = 0; j < iceDiskNodeScanState.parquetReader->getNumColumns(); ++j) { + if (iceDiskNodeScanState.parquetReader->getColumnName(j) == propertyName) { + parquetColIdx = j; + break; + } + } + auto& srcVector = dataChunk.getValueVectorMutable(parquetColIdx); + auto& dstVector = *iceDiskNodeScanState.outputVectors[i]; + for (size_t j = 0; j < selSize; ++j) { + dstVector.copyFromVectorData(j, &srcVector, dataChunk.state->getSelVector()[j]); + } + } + } + + for (size_t i = 0; i < selSize; ++i) { + ((nodeID_t*)iceDiskNodeScanState.nodeIDVector->getData())[i] = nodeID_t{ + iceDiskNodeScanState.currentStartRow + iceDiskNodeScanState.currentRowOffset + i, + nodeTableCatalogEntry->getTableID()}; + } + iceDiskNodeScanState.currentRowOffset += selSize; + iceDiskNodeScanState.outState->getSelVectorUnsafe().setSelSize(selSize); + iceDiskNodeScanState.outState->getSelVectorUnsafe().setToUnfiltered(); + return selSize > 0; + } +} + +common::row_idx_t IceDiskNodeTable::getNumTotalRows(const Transaction* transaction) { + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); + std::vector dummySkips; + processor::ParquetReader reader(resolvedPath, dummySkips, context); + return reader.getMetadata()->num_rows; +} + +} // namespace storage +} // namespace lbug diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp new file mode 100644 index 0000000000..b011b30b17 --- /dev/null +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -0,0 +1,399 @@ +#include "storage/table/ice_disk_rel_table.h" + +#include +#include +#include + +#include "storage/storage_manager.h" +#include "storage/table/csr_node_group.h" +#include "transaction/transaction.h" +#include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "common/exception/runtime.h" + +using namespace lbug::common; +using namespace lbug::transaction; +using namespace lbug::catalog; + +namespace lbug { +namespace storage { + +namespace { + +constexpr int64_t REL_ID_OUTPUT_COLUMN = -2; + +std::string resolveIceDiskPath(const std::string& storageRoot, const std::string& configuredPath, + const std::string& fallbackPath) { + if (configuredPath.empty()) { + return fallbackPath; + } + auto configured = std::filesystem::path{configuredPath}; + if (configured.is_absolute()) { + return configured.lexically_normal().string(); + } + if (storageRoot.empty()) { + return configured.lexically_normal().string(); + } + auto baseDir = std::filesystem::path{storageRoot}.parent_path(); + if (baseDir.empty()) { + return configured.lexically_normal().string(); + } + return (baseDir / configured).lexically_normal().string(); +} + +std::string getRelPropertyNameForColumnID(const RelGroupCatalogEntry& entry, column_id_t columnID) { + for (const auto& property : entry.getProperties()) { + if (entry.getColumnID(property.getName()) == columnID) { + return property.getName(); + } + } + throw RuntimeException("Column ID " + std::to_string(columnID) + + " does not map to an icebug-disk rel property."); +} + +void copyCachedBoundNodeSelVector(RelTableScanState& relScanState) { + if (relScanState.nodeIDVector->state->getSelVector().isUnfiltered()) { + relScanState.cachedBoundNodeSelVector.setToUnfiltered(); + } else { + relScanState.cachedBoundNodeSelVector.setToFiltered(); + memcpy(relScanState.cachedBoundNodeSelVector.getMutableBuffer().data(), + relScanState.nodeIDVector->state->getSelVector().getMutableBuffer().data(), + relScanState.nodeIDVector->state->getSelVector().getSelSize() * sizeof(sel_t)); + } + relScanState.cachedBoundNodeSelVector.setSelSize( + relScanState.nodeIDVector->state->getSelVector().getSelSize()); +} + +void emitPendingRow(IceDiskRelTableScanState& scanState) { + auto& row = scanState.pendingRows[scanState.nextPendingRowIdx++]; + scanState.setNodeIDVectorToFlat(row.boundNodeSelPos); + for (size_t outCol = 0; outCol < scanState.columnIDs.size(); ++outCol) { + auto columnID = scanState.columnIDs[outCol]; + if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { + continue; + } + if (columnID == NBR_ID_COLUMN_ID) { + scanState.outputVectors[outCol]->setValue(0, row.nbrNodeID); + } else if (columnID == REL_ID_COLUMN_ID) { + scanState.outputVectors[outCol]->setValue(0, row.relID); + } else if (outCol < row.propertyValues.size() && row.propertyValues[outCol]) { + scanState.outputVectors[outCol]->copyFromValue(0, *row.propertyValues[outCol]); + } + } + scanState.outState->getSelVectorUnsafe().setToUnfiltered(1); + if (scanState.nextPendingRowIdx >= scanState.pendingRows.size()) { + scanState.pendingRows.clear(); + scanState.nextPendingRowIdx = 0; + } +} + +} // namespace + +void IceDiskRelTableScanState::setToTable(const Transaction* transaction, Table* table_, + std::vector columnIDs_, + std::vector columnPredicateSets_, + common::RelDataDirection direction_) { + table = table_; + columnIDs = std::move(columnIDs_); + columnPredicateSets = std::move(columnPredicateSets_); + direction = direction_; + + auto& iceDiskRelTable = table_->cast(); + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, iceDiskRelTable.getIndicesFilePath()); + + std::vector dummySkips; + indicesReader = std::make_unique(resolvedPath, dummySkips, context); + auto tempState = std::make_unique(); + std::vector dummyGroups; + indicesReader->initializeScan(*tempState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); + + auto entry = iceDiskRelTable.getRelGroupCatalogEntry(); + outputColumnIdx.assign(columnIDs.size(), INVALID_COLUMN_ID); + columnSkips.assign(indicesReader->getNumColumns(), true); + + for (size_t outputCol = 0; outputCol < columnIDs.size(); ++outputCol) { + auto columnID = columnIDs[outputCol]; + if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { + continue; + } + if (columnID == NBR_ID_COLUMN_ID) { + bool found = false; + for (uint32_t i = 0; i < indicesReader->getNumColumns(); i++) { + if (indicesReader->getColumnName(i) == "nbr_id" || i == 0) { + outputColumnIdx[outputCol] = static_cast(i); + columnSkips[i] = false; + found = true; + break; + } + } + if (!found) { + throw RuntimeException("nbr_id column not found in indices parquet"); + } + continue; + } + if (columnID == REL_ID_COLUMN_ID) { + outputColumnIdx[outputCol] = REL_ID_OUTPUT_COLUMN; + continue; + } + + auto propertyName = getRelPropertyNameForColumnID(*entry, columnID); + bool found = false; + for (uint32_t i = 0; i < indicesReader->getNumColumns(); i++) { + if (indicesReader->getColumnName(i) == propertyName) { + outputColumnIdx[outputCol] = static_cast(i); + columnSkips[i] = false; + found = true; + break; + } + } + if (!found) { + throw RuntimeException("Property " + propertyName + " not found in parquet file"); + } + } + + indicesReader = std::make_unique(resolvedPath, columnSkips, context); + processor::ParquetReaderScanState initializedState; + indicesReader->initializeScan(initializedState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); +} + +IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, + common::table_id_t toTableID, const StorageManager* storageManager, + MemoryManager* memoryManager) + : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, + relGroupCatalogEntry{relGroupEntry} { + auto storage = relGroupEntry->getStorage(); + indicesFilePath = resolveIceDiskPath(storage, relGroupEntry->getIndicesPath(), + storage + "_indices_" + relGroupEntry->getName() + ".parquet"); + indptrFilePath = resolveIceDiskPath(storage, relGroupEntry->getIndptrPath(), + storage + "_indptr_" + relGroupEntry->getName() + ".parquet"); + if (indicesFilePath.empty() || indptrFilePath.empty()) { + throw RuntimeException("Invalid icebug-disk storage configuration for rel table: " + + relGroupEntry->getName()); + } + tableScanSharedState = std::make_unique(); +} + +void IceDiskRelTable::initializeScanCoordination(const Transaction* transaction) { + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); + std::vector dummySkips; + processor::ParquetReader reader(resolvedPath, dummySkips, context); + + auto metadata = reader.getMetadata(); + std::vector rowGroupStartRows; + std::vector rowGroupNumRows; + size_t currentOffset = 0; + + for (auto i = 0u; i < metadata->row_groups.size(); ++i) { + rowGroupStartRows.push_back(currentOffset); + rowGroupNumRows.push_back(metadata->row_groups[i].num_rows); + currentOffset += metadata->row_groups[i].num_rows; + } + + tableScanSharedState->reset(rowGroupStartRows, rowGroupNumRows); +} + +void IceDiskRelTable::initScanState(Transaction* /*transaction*/, TableScanState& scanState, + bool resetCachedBoundNodeSelVec) const { + auto& relScanState = scanState.cast(); + relScanState.source = TableScanSource::COMMITTED; + relScanState.nodeGroup = nullptr; + relScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + if (resetCachedBoundNodeSelVec) { + copyCachedBoundNodeSelVector(relScanState); + } + + auto& iceDiskScanState = static_cast(relScanState); + iceDiskScanState.scanCompleted = false; + iceDiskScanState.currentStartRow = 0; + iceDiskScanState.currentNumRows = 0; + iceDiskScanState.currentGlobalRowIdx = 0; + iceDiskScanState.nextRowGroupIdx = 0; + iceDiskScanState.pendingRows.clear(); + iceDiskScanState.nextPendingRowIdx = 0; +} + +void IceDiskRelTable::loadIndptrData(Transaction* transaction) const { + std::lock_guard lock(indptrDataMutex); + if (!indptrData.empty()) { + return; + } + + auto context = transaction->getClientContext(); + auto vfs = VirtualFileSystem::GetUnsafe(*context); + auto resolvedPath = VirtualFileSystem::resolvePath(context, indptrFilePath); + std::vector dummySkips; + auto indptrReader = std::make_unique(resolvedPath, dummySkips, context); + + auto scanState = std::make_unique(); + std::vector groupsToRead; + for (uint64_t i = 0; i < indptrReader->getMetadata()->row_groups.size(); ++i) { + groupsToRead.push_back(i); + } + indptrReader->initializeScan(*scanState, groupsToRead, vfs); + + DataChunk dataChunk(1); + dataChunk.insert(0, std::make_shared(LogicalType::UINT64(), MemoryManager::Get(*context))); + + while (indptrReader->scanInternal(*scanState, dataChunk)) { + auto selSize = dataChunk.state->getSelVector().getSelSize(); + auto& vector = dataChunk.getValueVectorMutable(0); + for (size_t i = 0; i < selSize; ++i) { + indptrData.push_back(((uint64_t*)vector.getData())[dataChunk.state->getSelVector()[i]]); + } + } +} + +common::offset_t IceDiskRelTable::findSourceNodeForRow(common::offset_t globalRowIdx) const { + auto it = std::upper_bound(indptrData.cbegin(), indptrData.cend(), (common::offset_t)globalRowIdx); + if (it == indptrData.cbegin()) { + return INVALID_OFFSET; + } + return std::distance(indptrData.cbegin(), it) - 1; +} + +bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& scanState) { + auto& iceDiskScanState = static_cast(scanState); + if (iceDiskScanState.scanCompleted) { + return false; + } + + if (iceDiskScanState.nextPendingRowIdx < iceDiskScanState.pendingRows.size()) { + emitPendingRow(iceDiskScanState); + return true; + } + + loadIndptrData(transaction); + scanState.resetOutVectors(); + + std::unordered_map boundNodeSelPosByOffset; + boundNodeSelPosByOffset.reserve(iceDiskScanState.cachedBoundNodeSelVector.getSelSize()); + for (size_t i = 0; i < iceDiskScanState.cachedBoundNodeSelVector.getSelSize(); ++i) { + auto pos = iceDiskScanState.cachedBoundNodeSelVector[i]; + boundNodeSelPosByOffset.emplace( + ((nodeID_t*)iceDiskScanState.nodeIDVector->getData())[pos].offset, pos); + } + + auto context = transaction->getClientContext(); + auto vfs = VirtualFileSystem::GetUnsafe(*context); + auto numColumns = iceDiskScanState.indicesReader->getNumColumns(); + DataChunk indicesChunk(numColumns); + uint32_t nbrColumnIdx = 0; + for (uint32_t i = 0; i < numColumns; ++i) { + if (iceDiskScanState.indicesReader->getColumnName(i) == "nbr_id") { + nbrColumnIdx = i; + break; + } + } + for (uint32_t i = 0; i < numColumns; ++i) { + indicesChunk.insert(i, std::make_shared( + iceDiskScanState.indicesReader->getColumnType(i).copy(), + MemoryManager::Get(*context))); + } + const auto nbrTableID = + iceDiskScanState.direction == RelDataDirection::BWD ? getFromNodeTableID() : getToNodeTableID(); + + while (true) { + if (iceDiskScanState.nodeGroupIdx == INVALID_NODE_GROUP_IDX) { + uint64_t startRow = 0; + uint64_t numRows = 0; + if (!tableScanSharedState->getMorsel( + static_cast(iceDiskScanState.nextRowGroupIdx), + startRow, numRows)) { + iceDiskScanState.scanCompleted = true; + return false; + } + iceDiskScanState.nodeGroupIdx = + static_cast(iceDiskScanState.nextRowGroupIdx++); + bool overlap = iceDiskScanState.direction == RelDataDirection::BWD; + if (!overlap) { + auto startNode = findSourceNodeForRow(startRow); + auto endNode = findSourceNodeForRow(startRow + numRows - 1); + for (const auto& [boundOffset, _] : boundNodeSelPosByOffset) { + if (boundOffset >= startNode && + (startNode == endNode || + (endNode != INVALID_OFFSET && boundOffset <= endNode))) { + overlap = true; + break; + } + } + } + + if (!overlap) { + iceDiskScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + continue; + } + + iceDiskScanState.currentStartRow = startRow; + iceDiskScanState.currentNumRows = numRows; + iceDiskScanState.currentGlobalRowIdx = startRow; + std::vector groupsToRead = {iceDiskScanState.nodeGroupIdx}; + iceDiskScanState.indicesReader->initializeScan(*iceDiskScanState.parquetScanState, + groupsToRead, vfs); + } + + indicesChunk.state->getSelVectorUnsafe().setSelSize(0); + iceDiskScanState.indicesReader->scan(*iceDiskScanState.parquetScanState, indicesChunk); + if (indicesChunk.state->getSelVector().getSelSize() == 0) { + iceDiskScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + continue; + } + + auto selSize = indicesChunk.state->getSelVector().getSelSize(); + for (size_t i = 0; i < selSize; ++i) { + auto pos = indicesChunk.state->getSelVector()[i]; + auto globalRowIdx = iceDiskScanState.currentGlobalRowIdx + i; + auto srcOffset = findSourceNodeForRow(globalRowIdx); + auto& nbrVec = indicesChunk.getValueVectorMutable(nbrColumnIdx); + auto dstOffset = nbrVec.getValue(pos); + const auto boundOffset = + iceDiskScanState.direction == RelDataDirection::BWD ? dstOffset : srcOffset; + if (!boundNodeSelPosByOffset.contains(boundOffset)) { + continue; + } + + PendingIceDiskRelRow row; + row.boundNodeSelPos = boundNodeSelPosByOffset.at(boundOffset); + row.relID = internalID_t{globalRowIdx, getTableID()}; + row.propertyValues.resize(iceDiskScanState.columnIDs.size()); + for (size_t outCol = 0; outCol < iceDiskScanState.columnIDs.size(); ++outCol) { + auto columnID = iceDiskScanState.columnIDs[outCol]; + if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID || + columnID == REL_ID_COLUMN_ID) { + continue; + } + auto parquetColIdx = iceDiskScanState.outputColumnIdx[outCol]; + if (parquetColIdx < 0) { + continue; + } + auto& vec = indicesChunk.getValueVectorMutable(static_cast(parquetColIdx)); + if (columnID == NBR_ID_COLUMN_ID) { + auto nbrOffset = + iceDiskScanState.direction == RelDataDirection::BWD ? srcOffset : dstOffset; + row.nbrNodeID = internalID_t{nbrOffset, nbrTableID}; + } else { + row.propertyValues[outCol] = vec.getAsValue(pos); + } + } + iceDiskScanState.pendingRows.push_back(std::move(row)); + } + + iceDiskScanState.currentGlobalRowIdx += selSize; + if (iceDiskScanState.pendingRows.empty()) { + continue; + } + emitPendingRow(iceDiskScanState); + return true; + } +} + +common::row_idx_t IceDiskRelTable::getNumTotalRows(const Transaction* transaction) { + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); + std::vector dummySkips; + processor::ParquetReader reader(resolvedPath, dummySkips, context); + return reader.getMetadata()->num_rows; +} + +} // namespace storage +} // namespace lbug diff --git a/test/include/test_runner/test_group.h b/test/include/test_runner/test_group.h index b0ba4663cd..c956e46f09 100644 --- a/test/include/test_runner/test_group.h +++ b/test/include/test_runner/test_group.h @@ -116,7 +116,8 @@ struct TestGroup { LBUG, JSON, CSV_TO_JSON, - GRAPH_STD + GRAPH_STD, + ICE_DISK }; DatasetType datasetType; diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt index e63f929d90..15b7ccb885 100644 --- a/test/storage/CMakeLists.txt +++ b/test/storage/CMakeLists.txt @@ -4,6 +4,7 @@ add_lbug_test(column_chunk_metadata_test column_chunk_metadata_test.cpp) add_lbug_test(local_hash_index_test local_hash_index_test.cpp) add_lbug_test(buffer_manager_test buffer_manager_test.cpp) add_lbug_test(rel_tests rel_scan_test.cpp rel_delete_test.cpp) +add_lbug_test(ice_disk_test ice_disk_test.cpp) add_lbug_test(node_update_test node_update_test.cpp) add_lbug_test(detach_delete_test detach_delete_test.cpp) add_lbug_test(storage_utils_test storage_utils_test.cpp) diff --git a/test/storage/ice_disk_test.cpp b/test/storage/ice_disk_test.cpp new file mode 100644 index 0000000000..9c89e66ac7 --- /dev/null +++ b/test/storage/ice_disk_test.cpp @@ -0,0 +1,157 @@ +#include "graph_test/private_graph_test.h" +#include "storage/storage_manager.h" +#include "storage/table/ice_disk_node_table.h" +#include "storage/table/ice_disk_rel_table.h" +#include "main/client_context.h" +#include "transaction/transaction.h" +#include "catalog/catalog.h" +#include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "catalog/catalog_entry/node_table_catalog_entry.h" +#include "storage/table/csr_node_group.h" + +using namespace lbug::common; +using namespace lbug::storage; +using namespace lbug::transaction; +using namespace lbug::catalog; + +namespace lbug { +namespace testing { + +class IceDiskStorageTest : public DBTest { +public: + std::string getInputDir() override { return TestHelper::appendLbugRootPath("dataset/demo-db/icebug-disk/"); } + + void SetUp() override { + DBTest::SetUp(); + conn->query("BEGIN TRANSACTION"); + context = getClientContext(*conn); + storageManager = database->getStorageManager(); + } + + main::ClientContext* context; + StorageManager* storageManager; +}; + +TEST_F(IceDiskStorageTest, NodeTableScanTest) { + auto catalog = Catalog::Get(*context); + auto transaction = Transaction::Get(*context); + auto tableEntry = catalog->getTableCatalogEntry(transaction, "user"); + ASSERT_NE(tableEntry, nullptr); + auto tableID = tableEntry->getTableID(); + auto table = storageManager->getTable(tableID); + auto nodeTable = dynamic_cast(table); + + ASSERT_NE(nodeTable, nullptr); + EXPECT_EQ(nodeTable->getNumTotalRows(transaction), 4); + + auto nodeIDVector = std::make_unique(LogicalType::INTERNAL_ID(), database->getMemoryManager()); + auto nameVector = std::make_unique(LogicalType::STRING(), database->getMemoryManager()); + auto ageVector = std::make_unique(LogicalType::INT64(), database->getMemoryManager()); + + std::vector outputVectors = {nameVector.get(), ageVector.get()}; + auto outState = std::make_shared(); + IceDiskNodeTableScanState scanState(nodeIDVector.get(), outputVectors, outState); + + // name is column 1, age is column 2 + scanState.setToTable(transaction, nodeTable, {1, 2}); + nodeTable->initializeScanCoordination(transaction); + nodeTable->initScanState(transaction, scanState); + + int count = 0; + while (nodeTable->scanInternal(transaction, scanState)) { + auto selSize = outState->getSelVector().getSelSize(); + for (auto i = 0u; i < selSize; i++) { + auto pos = outState->getSelVector()[i]; + auto name = ((lbug::common::string_t*)nameVector->getData())[pos].getAsString(); + auto age = ((int64_t*)ageVector->getData())[pos]; + if (name == "Adam") { EXPECT_EQ(age, 30); } + else if (name == "Karissa") { EXPECT_EQ(age, 40); } + else if (name == "Zhang") { EXPECT_EQ(age, 50); } + else if (name == "Noura") { EXPECT_EQ(age, 25); } + count++; + } + } + EXPECT_EQ(count, 4); +} + +TEST_F(IceDiskStorageTest, RelTableScanTest) { + auto catalog = Catalog::Get(*context); + auto transaction = Transaction::Get(*context); + auto relGroupEntry = + dynamic_cast(catalog->getTableCatalogEntry(transaction, "follows")); + + ASSERT_NE(relGroupEntry, nullptr); + + auto relTableID = relGroupEntry->getSingleRelEntryInfo().oid; + auto table = storageManager->getTable(relTableID); + auto relTable = dynamic_cast(table); + + ASSERT_NE(relTable, nullptr); + + auto nodeIDVector = std::make_unique(LogicalType::INTERNAL_ID(), database->getMemoryManager()); + nodeIDVector->state = std::make_shared(); + auto nbrIDVector = std::make_unique(LogicalType::INTERNAL_ID(), database->getMemoryManager()); + auto sinceVector = std::make_unique(LogicalType::INT32(), database->getMemoryManager()); + + std::vector outputVectors = {nbrIDVector.get(), sinceVector.get()}; + auto outState = std::make_shared(); + + auto memManager = database->getMemoryManager(); + IceDiskRelTableScanState scanState(*memManager, nodeIDVector.get(), outputVectors, outState); + + auto userTableEntry = catalog->getTableCatalogEntry(transaction, "user"); + ASSERT_NE(userTableEntry, nullptr); + auto userTableID = userTableEntry->getTableID(); + + // In this dataset, Adam is at row offset 1 in nodes_user.parquet. + nodeIDVector->state->getSelVectorUnsafe().setSelSize(1); + nodeIDVector->state->getSelVectorUnsafe().setToUnfiltered(); + + nodeID_t srcNode; + srcNode.offset = 1; + srcNode.tableID = userTableID; + ((lbug::common::nodeID_t*)nodeIDVector->getData())[0] = srcNode; + + auto sinceColumnID = relGroupEntry->getColumnID("since"); + scanState.setToTable(transaction, relTable, {NBR_ID_COLUMN_ID, sinceColumnID}); + relTable->initializeScanCoordination(transaction); + relTable->initScanState(transaction, scanState); + + int count = 0; + while (relTable->scanInternal(transaction, scanState)) { + auto selSize = outState->getSelVector().getSelSize(); + for (auto i = 0u; i < selSize; i++) { + auto pos = outState->getSelVector()[i]; + auto nbr = ((lbug::common::nodeID_t*)nbrIDVector->getData())[pos]; + // Adam follows Karissa (2) and Zhang (3) in nodes_user.parquet row order. + EXPECT_TRUE(nbr.offset == 2 || nbr.offset == 3); + count++; + } + } + EXPECT_EQ(count, 2); + + nodeID_t dstNode; + dstNode.offset = 2; + dstNode.tableID = userTableID; + ((lbug::common::nodeID_t*)nodeIDVector->getData())[0] = dstNode; + + scanState.setToTable(transaction, relTable, {NBR_ID_COLUMN_ID, sinceColumnID}, {}, + RelDataDirection::BWD); + relTable->initializeScanCoordination(transaction); + relTable->initScanState(transaction, scanState); + + count = 0; + while (relTable->scanInternal(transaction, scanState)) { + auto selSize = outState->getSelVector().getSelSize(); + for (auto i = 0u; i < selSize; i++) { + auto pos = outState->getSelVector()[i]; + auto nbr = ((lbug::common::nodeID_t*)nbrIDVector->getData())[pos]; + EXPECT_EQ(nbr.offset, 1); + count++; + } + } + EXPECT_EQ(count, 1); +} + +} // namespace testing +} // namespace lbug diff --git a/test/test_files/demo_db/demo_db_graph_std.test b/test/test_files/demo_db/demo_db_graph_std.test index 7917b89fa7..282558c882 100644 --- a/test/test_files/demo_db/demo_db_graph_std.test +++ b/test/test_files/demo_db/demo_db_graph_std.test @@ -1,4 +1,4 @@ --DATASET GRAPH-STD demo-db/icebug-disk +-DATASET GRAPH-STD demo-db/graph-std -- diff --git a/test/test_files/demo_db/demo_db_ice_disk.test b/test/test_files/demo_db/demo_db_ice_disk.test new file mode 100644 index 0000000000..50465b9b11 --- /dev/null +++ b/test/test_files/demo_db/demo_db_ice_disk.test @@ -0,0 +1,65 @@ +-DATASET ICEBUG-DISK demo-db/icebug-disk + +-- + +-CASE IceBugDiskExtensiveTest + +-LOG MatchUserNodes +-STATEMENT MATCH (a:user) RETURN a.name, a.age ORDER BY a.name; +---- 4 +Adam|30 +Karissa|40 +Noura|25 +Zhang|50 + +-LOG MatchCityNodes +-STATEMENT MATCH (c:city) RETURN c.name, c.population ORDER BY c.name; +---- 3 +Guelph|75000 +Kitchener|200000 +Waterloo|150000 + +-LOG MatchFollowsFilterByAge +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE b.age > 30 RETURN a.name, b.name, b.age ORDER BY b.age DESC; +---- 3 +Adam|Zhang|50 +Karissa|Zhang|50 +Adam|Karissa|40 + +-LOG MatchFollowsSince +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, e.since ORDER BY a.name, b.name; +---- 4 +Adam|Karissa|2020 +Adam|Zhang|2020 +Karissa|Zhang|2021 +Zhang|Noura|2022 + +-LOG MatchLivesIn +-STATEMENT MATCH (a:user)-[:livesin]->(c:city) RETURN a.name, c.name ORDER BY a.name; +---- 4 +Adam|Waterloo +Karissa|Waterloo +Noura|Guelph +Zhang|Kitchener + +-LOG MultiHop +-STATEMENT MATCH (a:user)-[:follows]->(b:user)-[:livesin]->(c:city) RETURN a.name, b.name, c.name ORDER BY a.name, b.name; +---- 4 +Adam|Karissa|Waterloo +Adam|Zhang|Kitchener +Karissa|Zhang|Kitchener +Zhang|Noura|Guelph + +-LOG FilterBySourceNode +-STATEMENT MATCH (a:user)-[:follows]->(b:user) WHERE a.name = 'Adam' RETURN b.name ORDER BY b.name; +---- 2 +Karissa +Zhang + +-LOG InternalRelID +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, ID(e) ORDER BY a.name, b.name; +---- 4 +Adam|Karissa|2:0 +Adam|Zhang|2:1 +Karissa|Zhang|2:2 +Zhang|Noura|2:3 diff --git a/test/test_runner/test_parser.cpp b/test/test_runner/test_parser.cpp index dea0ef047d..84e48438a4 100644 --- a/test/test_runner/test_parser.cpp +++ b/test/test_runner/test_parser.cpp @@ -91,6 +91,9 @@ void TestParser::extractDataset() { } else if (datasetType == "GRAPH-STD") { testGroup->datasetType = TestGroup::DatasetType::GRAPH_STD; testGroup->dataset = currentToken.params[2]; + } else if (datasetType == "ICEBUG-DISK") { + testGroup->datasetType = TestGroup::DatasetType::ICE_DISK; + testGroup->dataset = currentToken.params[2]; } else { throw TestException( "Invalid dataset type `" + currentToken.params[1] + "` [" + path + ":" + line + "]."); From cb20856e2783aae678b30a84aa00a5554e499f1f Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Tue, 28 Apr 2026 15:54:39 +0530 Subject: [PATCH 02/17] fix tests in demo_db ice_disk test --- test/test_files/demo_db/demo_db_ice_disk.test | 88 +++++++++++-------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/test/test_files/demo_db/demo_db_ice_disk.test b/test/test_files/demo_db/demo_db_ice_disk.test index 50465b9b11..7d938f7706 100644 --- a/test/test_files/demo_db/demo_db_ice_disk.test +++ b/test/test_files/demo_db/demo_db_ice_disk.test @@ -2,64 +2,76 @@ -- --CASE IceBugDiskExtensiveTest +-CASE DemoDBIceDiskTest --LOG MatchUserNodes --STATEMENT MATCH (a:user) RETURN a.name, a.age ORDER BY a.name; +-LOG MatchUserLivesInCity +-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, u.age, c.name; +---- 4 +Adam|30|Waterloo +Karissa|40|Waterloo +Zhang|50|Kitchener +Noura|25|Guelph + +-LOG MatchSingleNodeLabel +-STATEMENT MATCH (a:user) RETURN a.name, a.age; ---- 4 Adam|30 Karissa|40 -Noura|25 Zhang|50 +Noura|25 -LOG MatchCityNodes --STATEMENT MATCH (c:city) RETURN c.name, c.population ORDER BY c.name; +-STATEMENT MATCH (c:city) RETURN c.name, c.population; ---- 3 -Guelph|75000 -Kitchener|200000 Waterloo|150000 +Kitchener|200000 +Guelph|75000 --LOG MatchFollowsFilterByAge --STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE b.age > 30 RETURN a.name, b.name, b.age ORDER BY b.age DESC; ----- 3 -Adam|Zhang|50 -Karissa|Zhang|50 -Adam|Karissa|40 - --LOG MatchFollowsSince --STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, e.since ORDER BY a.name, b.name; +-LOG MatchFollowsRel +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, e.since; ---- 4 Adam|Karissa|2020 Adam|Zhang|2020 Karissa|Zhang|2021 Zhang|Noura|2022 --LOG MatchLivesIn --STATEMENT MATCH (a:user)-[:livesin]->(c:city) RETURN a.name, c.name ORDER BY a.name; +-LOG MatchLivesInWithCityPopulation +-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, c.name, c.population ORDER BY c.population DESC; ---- 4 +Zhang|Kitchener|200000 +Adam|Waterloo|150000 +Karissa|Waterloo|150000 +Noura|Guelph|75000 + +-LOG MatchLivesInFilterByCity +-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.name = 'Waterloo' RETURN u.name, u.age; +---- 2 +Adam|30 +Karissa|40 + +-LOG MatchLivesInFilterByCityPopulation +-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.population > 100000 RETURN u.name, c.name ORDER BY u.name; +---- 3 Adam|Waterloo Karissa|Waterloo -Noura|Guelph Zhang|Kitchener --LOG MultiHop --STATEMENT MATCH (a:user)-[:follows]->(b:user)-[:livesin]->(c:city) RETURN a.name, b.name, c.name ORDER BY a.name, b.name; ----- 4 -Adam|Karissa|Waterloo -Adam|Zhang|Kitchener -Karissa|Zhang|Kitchener -Zhang|Noura|Guelph +-LOG CountUsersPerCity +-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN c.name, COUNT(*) AS num_users ORDER BY num_users DESC; +---- 3 +Waterloo|2 +Guelph|1 +Kitchener|1 --LOG FilterBySourceNode --STATEMENT MATCH (a:user)-[:follows]->(b:user) WHERE a.name = 'Adam' RETURN b.name ORDER BY b.name; ----- 2 -Karissa -Zhang +-LOG MatchFollowsWithDestinationAge +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE b.age > 30 RETURN a.name, b.name, b.age ORDER BY b.age DESC; +---- 3 +Adam|Zhang|50 +Karissa|Zhang|50 +Adam|Karissa|40 --LOG InternalRelID --STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, ID(e) ORDER BY a.name, b.name; ----- 4 -Adam|Karissa|2:0 -Adam|Zhang|2:1 -Karissa|Zhang|2:2 -Zhang|Noura|2:3 +-LOG MatchFollowsFilterBySourceAndDest +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE a.age < 40 AND b.age >= 40 RETURN a.name, b.name; +---- 2 +Adam|Karissa +Adam|Zhang From 8358cf5cb0dd78b679454b994b38ab8317196d7c Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Fri, 1 May 2026 22:15:20 +0530 Subject: [PATCH 03/17] fix path validations --- .../storage/table/ice_disk_node_table.h | 6 ++-- src/include/storage/table/table.h | 1 - .../operator/scan/scan_node_table.cpp | 1 + src/storage/table/ice_disk_node_table.cpp | 36 ++++--------------- src/storage/table/ice_disk_rel_table.cpp | 36 +++++-------------- 5 files changed, 18 insertions(+), 62 deletions(-) diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index e17d0bc38d..d4e4e7ce2f 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -27,8 +27,6 @@ struct IceDiskNodeTableScanState : public TableScanState { parquetScanState = std::make_unique(); } - const catalog::NodeTableCatalogEntry* getNodeTableCatalogEntry() const; - void setToTable(const transaction::Transaction* transaction, Table* table_, std::vector columnIDs_, std::vector columnPredicateSets_ = {}, @@ -97,13 +95,13 @@ class IceDiskNodeTable final : public NodeTable { common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override; const std::string& getParquetFilePath() const { return parquetFilePath; } - const catalog::NodeTableCatalogEntry* getNodeTableCatalogEntry() const { return nodeTableCatalogEntry; } + const catalog::NodeTableCatalogEntry* getCatalogEntry() const { return nodeTableCatalogEntry; } IceDiskNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } private: std::string parquetFilePath; const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; - mutable std::unique_ptr tableScanSharedState; + std::unique_ptr tableScanSharedState; }; } // namespace storage diff --git a/src/include/storage/table/table.h b/src/include/storage/table/table.h index 12ea334809..fc14653c61 100644 --- a/src/include/storage/table/table.h +++ b/src/include/storage/table/table.h @@ -164,7 +164,6 @@ class LBUG_API Table { // Note that `resetCachedBoundNodeIDs` is only applicable to RelTable for now. virtual void initScanState(transaction::Transaction* transaction, TableScanState& readState, bool resetCachedBoundNodeSelVec = true) const = 0; - virtual void initializeScanCoordination(const transaction::Transaction* /*transaction*/) {} bool scan(transaction::Transaction* transaction, TableScanState& scanState); virtual void initInsertState(main::ClientContext* context, TableInsertState& insertState) = 0; diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index 4c5a71ab7a..62c8fc2fce 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -116,6 +116,7 @@ void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState, return; } + if (const auto iceDiskTable = dynamic_cast(this->table)) { const auto tableSharedState = iceDiskTable->getTableScanSharedState(); if (tableSharedState->getNextMorsel(static_cast(&scanState))) { diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index 07bea9584c..afff051eea 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -22,33 +22,6 @@ using namespace lbug::transaction; namespace lbug { namespace storage { -namespace { - -std::string resolveIceDiskPath(const std::string& storageRoot, const std::string& configuredPath, - const std::string& fallbackPath) { - if (configuredPath.empty()) { - return fallbackPath; - } - auto configured = std::filesystem::path{configuredPath}; - if (configured.is_absolute()) { - return configured.lexically_normal().string(); - } - if (storageRoot.empty()) { - return configured.lexically_normal().string(); - } - auto baseDir = std::filesystem::path{storageRoot}.parent_path(); - if (baseDir.empty()) { - return configured.lexically_normal().string(); - } - return (baseDir / configured).lexically_normal().string(); -} - -} // namespace - -const NodeTableCatalogEntry* IceDiskNodeTableScanState::getNodeTableCatalogEntry() const { - return table->cast().getNodeTableCatalogEntry(); -} - void IceDiskNodeTableScanState::setToTable(const Transaction* transaction, Table* table_, std::vector columnIDs_, std::vector columnPredicateSets_, RelDataDirection /*direction*/) { @@ -66,7 +39,7 @@ void IceDiskNodeTableScanState::setToTable(const Transaction* transaction, Table tempReader->initializeScan(tempState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); columnSkips.assign(tempReader->getNumColumns(), true); - auto entry = iceDiskTable.getNodeTableCatalogEntry(); + auto entry = iceDiskTable.getCatalogEntry(); for (auto columnID : columnIDs) { if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { continue; @@ -91,8 +64,11 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, : NodeTable{storageManager, nodeTableEntry, memoryManager}, nodeTableCatalogEntry{nodeTableEntry}, tableScanSharedState{std::make_unique()} { - parquetFilePath = resolveIceDiskPath(nodeTableEntry->getStorage(), nodeTableEntry->getTablePath(), - nodeTableEntry->getStorage() + "_nodes_" + nodeTableEntry->getName() + ".parquet"); + if (nodeTableEntry->getTablePath().empty()) { + throw RuntimeException("Parquet file path is empty for icebug-disk-backed node table"); + } + + parquetFilePath = nodeTableEntry->getTablePath(); } void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction) { diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index b011b30b17..5b33e1620e 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -21,25 +21,6 @@ namespace { constexpr int64_t REL_ID_OUTPUT_COLUMN = -2; -std::string resolveIceDiskPath(const std::string& storageRoot, const std::string& configuredPath, - const std::string& fallbackPath) { - if (configuredPath.empty()) { - return fallbackPath; - } - auto configured = std::filesystem::path{configuredPath}; - if (configured.is_absolute()) { - return configured.lexically_normal().string(); - } - if (storageRoot.empty()) { - return configured.lexically_normal().string(); - } - auto baseDir = std::filesystem::path{storageRoot}.parent_path(); - if (baseDir.empty()) { - return configured.lexically_normal().string(); - } - return (baseDir / configured).lexically_normal().string(); -} - std::string getRelPropertyNameForColumnID(const RelGroupCatalogEntry& entry, column_id_t columnID) { for (const auto& property : entry.getProperties()) { if (entry.getColumnID(property.getName()) == columnID) { @@ -161,15 +142,16 @@ IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::ta MemoryManager* memoryManager) : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, relGroupCatalogEntry{relGroupEntry} { - auto storage = relGroupEntry->getStorage(); - indicesFilePath = resolveIceDiskPath(storage, relGroupEntry->getIndicesPath(), - storage + "_indices_" + relGroupEntry->getName() + ".parquet"); - indptrFilePath = resolveIceDiskPath(storage, relGroupEntry->getIndptrPath(), - storage + "_indptr_" + relGroupEntry->getName() + ".parquet"); - if (indicesFilePath.empty() || indptrFilePath.empty()) { - throw RuntimeException("Invalid icebug-disk storage configuration for rel table: " + - relGroupEntry->getName()); + if (relGroupEntry->getIndicesPath().empty()) { + throw RuntimeException("Indices file path is empty for icebug-disk-backed rel table"); + } + + if (relGroupEntry->getIndptrPath().empty()) { + throw RuntimeException("Indptr file path is empty for icebug-disk-backed rel table"); } + + indicesFilePath = relGroupEntry->getIndicesPath(); + indptrFilePath = relGroupEntry->getIndptrPath(); tableScanSharedState = std::make_unique(); } From 3e895f2ed36de2755741a4738df27ef658106929 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Sun, 3 May 2026 14:50:38 +0530 Subject: [PATCH 04/17] fix IceDiskNodeTable::getNumTotalRows --- .../storage/table/parquet_node_table.h | 1 - src/storage/table/ice_disk_node_table.cpp | 25 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/include/storage/table/parquet_node_table.h b/src/include/storage/table/parquet_node_table.h index 9554471dc2..c66b0e5f0c 100644 --- a/src/include/storage/table/parquet_node_table.h +++ b/src/include/storage/table/parquet_node_table.h @@ -81,7 +81,6 @@ class ParquetNodeTable final : public ColumnarNodeTableBase { private: std::string parquetFilePath; - void initializeParquetReader(transaction::Transaction* transaction) const; void initParquetScanForRowGroup(transaction::Transaction* transaction, ParquetNodeTableScanState& scanState) const; }; diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index afff051eea..bacb251775 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -175,10 +175,27 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc common::row_idx_t IceDiskNodeTable::getNumTotalRows(const Transaction* transaction) { auto context = transaction->getClientContext(); - auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - std::vector dummySkips; - processor::ParquetReader reader(resolvedPath, dummySkips, context); - return reader.getMetadata()->num_rows; + + if (!context) { + return 0; + } + + std::vector columnSkips; + + try { + auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); + auto tempReader = std::make_unique(resolvedPath, columnSkips, context); + + if (!tempReader) { + return 0; + } + + auto metadata = tempReader->getMetadata(); + return metadata ? metadata->num_rows : 0; + } catch (const std::exception& e) { + // If parquet file is corrupted or invalid, return 0 instead of crashing + return 0; + } } } // namespace storage From 5aadc7cf9c14e8b0c2b86b290d6008e3099d0374 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Sun, 3 May 2026 17:25:13 +0530 Subject: [PATCH 05/17] fix IceDiskNodeTable scan init --- .../storage/table/ice_disk_node_table.h | 70 +++++++--------- .../operator/scan/scan_node_table.cpp | 15 +--- src/storage/table/ice_disk_node_table.cpp | 82 ++++++------------- 3 files changed, 58 insertions(+), 109 deletions(-) diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index d4e4e7ce2f..3ca89adba5 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -11,63 +11,44 @@ namespace storage { class IceDiskNodeTable; -struct IceDiskNodeTableScanState : public TableScanState { - std::unique_ptr parquetReader; - std::unique_ptr parquetScanState; - std::vector columnSkips; - bool scanCompleted = false; - uint64_t currentStartRow = 0; - uint64_t currentNumRows = 0; - uint64_t currentRowOffset = 0; - - IceDiskNodeTableScanState(common::ValueVector* nodeIDVector, - std::vector outputVectors, - std::shared_ptr outChunkState) - : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { - parquetScanState = std::make_unique(); - } - - void setToTable(const transaction::Transaction* transaction, Table* table_, - std::vector columnIDs_, - std::vector columnPredicateSets_ = {}, - common::RelDataDirection direction = common::RelDataDirection::FWD) override; -}; - struct IceDiskNodeTableScanSharedState { private: std::mutex mtx; - std::vector rowGroupRows; - std::vector rowGroupStartRows; - common::node_group_idx_t currentMorselIdx = 0; + common::node_group_idx_t currentRowGroupIdx = 0; + common::node_group_idx_t numRowGroups = 0; public: - IceDiskNodeTableScanSharedState() {} - - void reset(std::vector rows, std::vector startRows) { + void reset(common::node_group_idx_t totalRowGroups) { std::lock_guard lock(mtx); - this->rowGroupRows = std::move(rows); - this->rowGroupStartRows = std::move(startRows); - this->currentMorselIdx = 0; + currentRowGroupIdx = 0; + numRowGroups = totalRowGroups; } + bool getNextMorsel(IceDiskNodeTableScanState* scanState) { std::lock_guard lock(mtx); - if (currentMorselIdx < rowGroupRows.size()) { - scanState->nodeGroupIdx = currentMorselIdx; - scanState->currentNumRows = rowGroupRows[currentMorselIdx]; - scanState->currentStartRow = rowGroupStartRows[currentMorselIdx]; - currentMorselIdx++; + if (currentRowGroupIdx < numRowGroups) { + scanState->currentRowGroupIdx = currentRowGroupIdx++; return true; } return false; - } + } +}; + +struct IceDiskNodeTableScanState : public TableScanState { + std::unique_ptr parquetReader; + std::unique_ptr parquetScanState; + std::vector columnSkips; + bool scanCompleted = false; + common::node_group_idx_t currentRowGroupIdx = 0; - void resetMorsel() { - std::lock_guard lock(mtx); - currentMorselIdx = 0; - } - size_t getNumMorsels() const { return rowGroupRows.size(); } + IceDiskNodeTableScanState(common::ValueVector* nodeIDVector, + std::vector outputVectors, + std::shared_ptr outChunkState) + : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { + parquetScanState = std::make_unique(); + } }; class IceDiskNodeTable final : public NodeTable { @@ -98,6 +79,11 @@ class IceDiskNodeTable final : public NodeTable { const catalog::NodeTableCatalogEntry* getCatalogEntry() const { return nodeTableCatalogEntry; } IceDiskNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } + size_t getNumScanMorsels(const transaction::Transaction* transaction) const; + +private: + size_t getNumRowGroups(const transaction::Transaction* transaction) const; + private: std::string parquetFilePath; const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index 62c8fc2fce..22114afc1e 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -71,17 +71,7 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa this->numCommittedNodeGroups = 1; } } else if (const auto iceDiskTable = dynamic_cast(table)) { - std::vector columnSkips; - try { - auto context = transaction->getClientContext(); - auto resolvedPath = - common::VirtualFileSystem::resolvePath(context, iceDiskTable->getParquetFilePath()); - auto tempReader = - std::make_unique(resolvedPath, columnSkips, context); - this->numCommittedNodeGroups = tempReader->getNumRowsGroups(); - } catch (const std::exception& e) { - this->numCommittedNodeGroups = 1; - } + this->numCommittedNodeGroups = iceDiskTable->getNumScanMorsels(transaction); } else if (const auto arrowTable = dynamic_cast(table)) { // For Arrow tables, set numCommittedNodeGroups to number of morsels this->numCommittedNodeGroups = @@ -103,7 +93,8 @@ void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState, ScanNodeTableProgressSharedState& progressSharedState) { std::unique_lock lck{mtx}; - // ColumnarNodeTables handle morsel assignment internally + // Columnar/Icebug NodeTables handle morsel assignment internally + // TODO: parquet tables https://github.com/LadybugDB/ladybug/issues/245 if (const auto arrowTable = dynamic_cast(this->table)) { const auto tableSharedState = arrowTable->getTableScanSharedState(); diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index bacb251775..50e38d4262 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -22,43 +22,6 @@ using namespace lbug::transaction; namespace lbug { namespace storage { -void IceDiskNodeTableScanState::setToTable(const Transaction* transaction, Table* table_, - std::vector columnIDs_, std::vector columnPredicateSets_, - RelDataDirection /*direction*/) { - table = table_; - columnIDs = std::move(columnIDs_); - columnPredicateSets = std::move(columnPredicateSets_); - - auto& iceDiskTable = table->cast(); - auto context = transaction->getClientContext(); - auto resolvedPath = VirtualFileSystem::resolvePath(context, iceDiskTable.getParquetFilePath()); - std::vector dummySkips; - auto tempReader = std::make_unique(resolvedPath, dummySkips, context); - processor::ParquetReaderScanState tempState; - std::vector dummyGroups; - tempReader->initializeScan(tempState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); - columnSkips.assign(tempReader->getNumColumns(), true); - - auto entry = iceDiskTable.getCatalogEntry(); - for (auto columnID : columnIDs) { - if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { - continue; - } - - auto propertyName = entry->getProperty(columnID).getName(); - for (uint32_t j = 0; j < tempReader->getNumColumns(); ++j) { - if (tempReader->getColumnName(j) == propertyName) { - columnSkips[j] = false; - break; - } - } - } - - parquetReader = std::make_unique(resolvedPath, columnSkips, context); - processor::ParquetReaderScanState scanState; - parquetReader->initializeScan(scanState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); -} - IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, const NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager) : NodeTable{storageManager, nodeTableEntry, memoryManager}, @@ -72,24 +35,10 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, } void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction) { - std::vector rowGroupRows; - std::vector rowGroupStartRows; - auto context = transaction->getClientContext(); - if (context) { - std::vector dummySkips; - auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - auto tempReader = std::make_unique(resolvedPath, dummySkips, context); - auto metadata = tempReader->getMetadata(); - if (metadata) { - uint64_t currentStartRow = 0; - for (size_t i = 0; i < metadata->row_groups.size(); ++i) { - rowGroupRows.push_back(metadata->row_groups[i].num_rows); - rowGroupStartRows.push_back(currentStartRow); - currentStartRow += metadata->row_groups[i].num_rows; - } - } - } - tableScanSharedState->reset(std::move(rowGroupRows), std::move(rowGroupStartRows)); + auto iceDiskScanSharedState = + static_cast(tableScanSharedState.get()); + auto numRowGroups = getNumRowGroups(transaction); + iceDiskScanSharedState->reset(numRowGroups); } void IceDiskNodeTable::initScanState(Transaction* /*transaction*/, TableScanState& scanState, @@ -198,5 +147,28 @@ common::row_idx_t IceDiskNodeTable::getNumTotalRows(const Transaction* transacti } } +size_t IceDiskNodeTable::getNumRowGroups(const transaction::Transaction* transaction) const { + auto context = transaction->getClientContext(); + + if (!context) { + return 0; + } + + std::vector columnSkips; + + try { + auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); + auto tempReader = std::make_unique(resolvedPath, columnSkips, context); + return tempReader ? tempReader->getNumRowGroups() : 0; + } catch (const std::exception& e) { + // If parquet file is corrupted or invalid, return 0 instead of crashing + return 0; + } +} + +size_t IceDiskNodeTable::getNumScanMorsels(const transaction::Transaction* transaction) const { + return getNumRowGroups(transaction); +} + } // namespace storage } // namespace lbug From 2a522e8c5b8ff83dd7a713a0966db2c5d3c45daf Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Mon, 4 May 2026 12:57:36 +0530 Subject: [PATCH 06/17] fix ice disk node table scan --- .../reader/parquet/parquet_reader.h | 2 +- .../storage/table/ice_disk_node_table.h | 47 +-- .../storage/table/ice_disk_rel_table.h | 2 +- .../reader/parquet/parquet_reader.cpp | 6 +- .../operator/scan/scan_node_table.cpp | 2 +- src/storage/table/ice_disk_node_table.cpp | 279 +++++++++++++----- src/storage/table/parquet_node_table.cpp | 2 +- src/storage/table/parquet_rel_table.cpp | 4 +- 8 files changed, 241 insertions(+), 103 deletions(-) diff --git a/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h b/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h index 2da8342a90..c0a2cedd5f 100644 --- a/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h +++ b/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h @@ -48,7 +48,7 @@ class ParquetReader { common::VirtualFileSystem* vfs); bool scanInternal(ParquetReaderScanState& state, common::DataChunk& result); void scan(ParquetReaderScanState& state, common::DataChunk& result); - uint64_t getNumRowsGroups() { return metadata->row_groups.size(); } + uint64_t getNumRowGroups() { return metadata->row_groups.size(); } uint32_t getNumColumns() const { return columnNames.size(); } std::string getColumnName(uint32_t idx) const { return columnNames[idx]; } diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index 3ca89adba5..8652fdf0ea 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -11,11 +11,30 @@ namespace storage { class IceDiskNodeTable; +struct IceDiskNodeTableScanState : public TableScanState { + bool initialized = false; + std::unique_ptr parquetReader; + std::unique_ptr parquetScanState; + bool scanCompleted = false; + std::size_t currentRowGroupIdx = static_cast(common::INVALID_NODE_GROUP_IDX); + bool dataReadCompleted = false; + std::vector>> data; // data[rowGroup][column] + std::size_t currentRowGroupBatchOffset = 0; // offset of current rowGroupBatch + + + IceDiskNodeTableScanState(common::ValueVector* nodeIDVector, + std::vector outputVectors, + std::shared_ptr outChunkState) + : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { + parquetScanState = std::make_unique(); + } +}; + struct IceDiskNodeTableScanSharedState { private: std::mutex mtx; - common::node_group_idx_t currentRowGroupIdx = 0; - common::node_group_idx_t numRowGroups = 0; + std::size_t currentRowGroupIdx = 0; + std::size_t numRowGroups = 0; public: void reset(common::node_group_idx_t totalRowGroups) { @@ -35,22 +54,6 @@ struct IceDiskNodeTableScanSharedState { } }; -struct IceDiskNodeTableScanState : public TableScanState { - std::unique_ptr parquetReader; - std::unique_ptr parquetScanState; - std::vector columnSkips; - bool scanCompleted = false; - common::node_group_idx_t currentRowGroupIdx = 0; - - - IceDiskNodeTableScanState(common::ValueVector* nodeIDVector, - std::vector outputVectors, - std::shared_ptr outChunkState) - : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { - parquetScanState = std::make_unique(); - } -}; - class IceDiskNodeTable final : public NodeTable { public: IceDiskNodeTable(const StorageManager* storageManager, @@ -79,15 +82,19 @@ class IceDiskNodeTable final : public NodeTable { const catalog::NodeTableCatalogEntry* getCatalogEntry() const { return nodeTableCatalogEntry; } IceDiskNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } - size_t getNumScanMorsels(const transaction::Transaction* transaction) const; + std::size_t getNumScanMorsels(const transaction::Transaction* transaction) const; private: - size_t getNumRowGroups(const transaction::Transaction* transaction) const; + std::size_t getNumRowGroups(const transaction::Transaction* transaction) const; + void initIceDiskScanForRowGroup(transaction::Transaction* transaction, IceDiskNodeTableScanState& scanState) const; + void readParquetData(transaction::Transaction* transaction, TableScanState& scanState) const; private: std::string parquetFilePath; const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; std::unique_ptr tableScanSharedState; + std::vector rowGroupStartOffsets; // Starting row offset for each row group in the parquet file + constexpr static std::size_t scanRowGroupBatchSize = 2048; // Default batch size }; } // namespace storage diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index 636010339c..43630a5d5d 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -98,7 +98,7 @@ class IceDiskRelTable final : public RelTable { common::table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager); - void initializeScanCoordination(const transaction::Transaction* transaction) override; + void initializeScanCoordination(const transaction::Transaction* transaction); void initScanState(transaction::Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec = true) const override; diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index 57939193ea..0972426319 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -596,7 +596,7 @@ ParquetScanSharedState::ParquetScanSharedState(FileScanInfo fileScanInfo, uint64 for (auto i = fileIdx.load(); i < this->fileScanInfo.getNumFiles(); i++) { auto reader = std::make_unique(this->fileScanInfo.filePaths[i], columnSkips, context); - totalRowsGroups += reader->getNumRowsGroups(); + totalRowsGroups += reader->getNumRowGroups(); } numBlocksReadByFiles = 0; } @@ -608,7 +608,7 @@ static bool parquetSharedStateNext(ParquetScanLocalState& localState, if (sharedState.fileIdx >= sharedState.fileScanInfo.getNumFiles()) { return false; } - if (sharedState.blockIdx < sharedState.readers[sharedState.fileIdx]->getNumRowsGroups()) { + if (sharedState.blockIdx < sharedState.readers[sharedState.fileIdx]->getNumRowGroups()) { localState.reader = sharedState.readers[sharedState.fileIdx].get(); localState.reader->initializeScan(*localState.state, {sharedState.blockIdx}, VirtualFileSystem::GetUnsafe(*sharedState.context)); @@ -616,7 +616,7 @@ static bool parquetSharedStateNext(ParquetScanLocalState& localState, return true; } else { sharedState.numBlocksReadByFiles += - sharedState.readers[sharedState.fileIdx]->getNumRowsGroups(); + sharedState.readers[sharedState.fileIdx]->getNumRowGroups(); sharedState.blockIdx = 0; sharedState.fileIdx++; if (sharedState.fileIdx >= sharedState.fileScanInfo.getNumFiles()) { diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index 22114afc1e..fe70d6deef 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -66,7 +66,7 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa common::VirtualFileSystem::resolvePath(context, parquetTable->getParquetFilePath()); auto tempReader = std::make_unique(resolvedPath, columnSkips, context); - this->numCommittedNodeGroups = tempReader->getNumRowsGroups(); + this->numCommittedNodeGroups = tempReader->getNumRowGroups(); } catch (const std::exception& e) { this->numCommittedNodeGroups = 1; } diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index 50e38d4262..cb9f05aefe 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -35,138 +35,269 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, } void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction) { - auto iceDiskScanSharedState = - static_cast(tableScanSharedState.get()); - auto numRowGroups = getNumRowGroups(transaction); - iceDiskScanSharedState->reset(numRowGroups); + rowGroupStartOffsets.clear(); + + auto context = transaction->getClientContext(); + if (context) { + auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); + auto tempReader = std::make_unique(resolvedPath, std::vector(), context); + + tableScanSharedState->reset(tempReader->getNumRowGroups()); + + auto metadata = tempReader->getMetadata(); + uint64_t currentStartOffset = 0; + + for (std::size_t i = 0; i < metadata->row_groups.size(); ++i) { + rowGroupStartOffsets.push_back(currentStartOffset); + currentStartOffset += metadata->row_groups[i].num_rows; + } + } } -void IceDiskNodeTable::initScanState(Transaction* /*transaction*/, TableScanState& scanState, +void IceDiskNodeTable::initScanState(Transaction* transaction, TableScanState& scanState, bool /*resetCachedBoundNodeSelVec*/) const { auto& iceDiskNodeScanState = static_cast(scanState); - iceDiskNodeScanState.source = TableScanSource::COMMITTED; + + if(iceDiskNodeScanState.currentRowGroupIdx != static_cast(common::INVALID_NODE_GROUP_IDX)) { + iceDiskNodeScanState.scanCompleted = true; + return; + } + iceDiskNodeScanState.scanCompleted = false; - iceDiskNodeScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; - iceDiskNodeScanState.currentRowOffset = 0; + iceDiskNodeScanState.dataReadCompleted = false; + iceDiskNodeScanState.data.clear(); + iceDiskNodeScanState.currentRowGroupBatchOffset = 0; + + // Each scan state gets its own parquet reader for thread safety and initialized only once + if (!iceDiskNodeScanState.initialized) { + auto context = transaction->getClientContext(); + if (!context) { + throw RuntimeException("Invalid client context for IceDisk scan state initialization"); + } + + try { + auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); + iceDiskNodeScanState.parquetReader = + std::make_unique(resolvedPath, std::vector(), context); + iceDiskNodeScanState.initialized = true; + } catch (const std::exception& e) { + throw RuntimeException("Failed to initialize parquet reader for file '" + + parquetFilePath + "': " + e.what()); + } + } + + // Initialize scan state for the current row group (assigned via shared state) + initIceDiskScanForRowGroup(transaction, iceDiskNodeScanState); } +void IceDiskNodeTable::initIceDiskScanForRowGroup(Transaction* transaction, + IceDiskNodeTableScanState& scanState) const { + auto context = transaction->getClientContext(); + if (!context) { + return; + } + + auto vfs = VirtualFileSystem::GetUnsafe(*context); + if (!vfs) { + return; + } + + // Defensive check: ensure parquet reader exists + if (!scanState.parquetReader) { + return; + } + + // Defensive check: ensure parquet scan state exists + if (!scanState.parquetScanState) { + return; + } + + // Re-initialize scan for the specific row groups + // Note: initializeScan can be called multiple times; the first call populates column metadata + scanState.parquetReader->initializeScan(*scanState.parquetScanState, {scanState.currentRowGroupIdx}, vfs); +} + + +// First run always fails due to iceDiskNodeScanState.scanCompleted == true as +// scanState.currentRowGroupIdx = INVALID_NODE_GROUP_IDX on the first +// run(look at initScanState function) tableScanSharedState.nextMorsel will drive the morsel assignment bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& scanState) { auto& iceDiskNodeScanState = static_cast(scanState); if (iceDiskNodeScanState.scanCompleted) { return false; } - auto vfs = VirtualFileSystem::GetUnsafe(*transaction->getClientContext()); - DataChunk dataChunk(iceDiskNodeScanState.parquetReader->getNumColumns()); - for (uint32_t i = 0; i < iceDiskNodeScanState.parquetReader->getNumColumns(); ++i) { - dataChunk.insert(i, std::make_shared( - iceDiskNodeScanState.parquetReader->getColumnType(i).copy(), - MemoryManager::Get(*transaction->getClientContext()))); + scanState.resetOutVectors(); + + // Read all data once into scan state + if (!iceDiskNodeScanState.dataReadCompleted) { + readParquetData(transaction, scanState); } - while (true) { - if (iceDiskNodeScanState.nodeGroupIdx == INVALID_NODE_GROUP_IDX) { - if (!tableScanSharedState->getNextMorsel(&iceDiskNodeScanState)) { - iceDiskNodeScanState.scanCompleted = true; - return false; + if (iceDiskNodeScanState.currentRowGroupBatchOffset >= iceDiskNodeScanState.data.size()) { + iceDiskNodeScanState.scanCompleted = true; + return false; + } + + auto outputSize = std::min(scanRowGroupBatchSize, iceDiskNodeScanState.data.size() - iceDiskNodeScanState.currentRowGroupBatchOffset); + auto numColumns = + std::min(scanState.outputVectors.size(), iceDiskNodeScanState.data[iceDiskNodeScanState.currentRowGroupBatchOffset].size()); + + for (std::size_t col = 0; col < numColumns; ++col) { + auto& dstVector = *scanState.outputVectors[col]; + + for (std::size_t i = 0; i < outputSize; ++i) { + auto& value = *iceDiskNodeScanState.data[iceDiskNodeScanState.currentRowGroupBatchOffset + i][col]; + if (value.isNull()) { + dstVector.setNull(i, true); + } else { + dstVector.copyFromValue(i, value); } - iceDiskNodeScanState.currentRowOffset = 0; - std::vector groupsToRead = {iceDiskNodeScanState.nodeGroupIdx}; - iceDiskNodeScanState.parquetReader->initializeScan(*iceDiskNodeScanState.parquetScanState, - groupsToRead, vfs); } + } - dataChunk.state->getSelVectorUnsafe().setSelSize(0); - iceDiskNodeScanState.parquetReader->scan(*iceDiskNodeScanState.parquetScanState, dataChunk); - if (dataChunk.state->getSelVector().getSelSize() == 0) { - iceDiskNodeScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; - iceDiskNodeScanState.currentRowOffset = 0; - continue; - } + auto tableID = this->getTableID(); + for (std::size_t i = 0; i < outputSize; ++i) { + auto& nodeID = scanState.nodeIDVector->getValue(i); + nodeID.tableID = tableID; + // assign parquet rowIndex + nodeID.offset = rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; + } + + iceDiskNodeScanState.currentRowGroupBatchOffset += outputSize; + scanState.outState->getSelVectorUnsafe().setSelSize(outputSize); + return true; +} + +void IceDiskNodeTable::readParquetData(Transaction* transaction, TableScanState& scanState) const { + auto& iceDiskNodeScanState = static_cast(scanState); + auto numColumns = iceDiskNodeScanState.parquetReader->getNumColumns(); + + // Defensive check: ensure parquet file has at least one column + if (numColumns == 0) { + throw RuntimeException("Parquet file '" + parquetFilePath + "' has no columns"); + } + + // Create vectors with parquet types + // Always create the data chunk to match the exact number of parquet columns + // to prevent crashes in the parquet reader when accessing result vectors + DataChunk parquetDataChunk(numColumns, scanState.outState); + + for (uint32_t i = 0; i < numColumns; ++i) { + const auto& parquetColumnType = iceDiskNodeScanState.parquetReader->getColumnType(i); + auto columnType = parquetColumnType.copy(); + auto vector = std::make_shared(std::move(columnType), + MemoryManager::Get(*transaction->getClientContext()), scanState.outState); + parquetDataChunk.insert(i, vector); + } + + // Read from parquet + iceDiskNodeScanState.parquetReader->scan(*iceDiskNodeScanState.parquetScanState, parquetDataChunk); - scanState.resetOutVectors(); - auto selSize = dataChunk.state->getSelVector().getSelSize(); - for (uint32_t i = 0; i < iceDiskNodeScanState.columnIDs.size(); ++i) { - auto columnID = iceDiskNodeScanState.columnIDs[i]; - if (columnID == ROW_IDX_COLUMN_ID) { - for (size_t j = 0; j < selSize; ++j) { - ((row_idx_t*)iceDiskNodeScanState.outputVectors[i]->getData())[j] = - iceDiskNodeScanState.currentStartRow + iceDiskNodeScanState.currentRowOffset + j; + auto selSize = parquetDataChunk.state->getSelVector().getSelSize(); + if (selSize > 0) { + iceDiskNodeScanState.data.resize(selSize); + for (std::size_t row = 0; row < selSize; ++row) { + iceDiskNodeScanState.data[row].resize( + scanState.outputVectors + .size()); // Use output vector count, not parquet column count + + // Map parquet columns to correct output vector positions by name + // Defensive check: ensure we don't access more columns than available in the chunk + auto maxParquetCol = std::min(static_cast(numColumns), + static_cast(parquetDataChunk.getNumValueVectors())); + + for (std::size_t parquetCol = 0; parquetCol < maxParquetCol; ++parquetCol) { + // Defensive check: ensure the column index is valid for the data chunk + if (parquetCol >= parquetDataChunk.getNumValueVectors()) { + continue; } - } else if (columnID != INVALID_COLUMN_ID) { - uint32_t parquetColIdx = 0; - auto propertyName = nodeTableCatalogEntry->getProperty(columnID).getName(); - for (uint32_t j = 0; j < iceDiskNodeScanState.parquetReader->getNumColumns(); ++j) { - if (iceDiskNodeScanState.parquetReader->getColumnName(j) == propertyName) { - parquetColIdx = j; + + auto& srcVector = parquetDataChunk.getValueVectorMutable(parquetCol); + + // Get parquet column name and find its corresponding column ID + std::string parquetColumnName = + iceDiskNodeScanState.parquetReader->getColumnName(parquetCol); + auto nodeTableEntry = this->nodeTableCatalogEntry; + + // Check if the column exists first before calling getColumnID + if (!nodeTableEntry->containsProperty(parquetColumnName)) { + // Column doesn't exist in table schema, skip it + continue; + } + + // Find the column ID for this property name + column_id_t parquetColumnID = nodeTableEntry->getColumnID(parquetColumnName); + + // Find which output vector position corresponds to this column ID + std::size_t outputCol = INVALID_COLUMN_ID; + for (std::size_t outCol = 0; outCol < scanState.columnIDs.size(); ++outCol) { + if (scanState.columnIDs[outCol] == parquetColumnID) { + outputCol = outCol; break; } } - auto& srcVector = dataChunk.getValueVectorMutable(parquetColIdx); - auto& dstVector = *iceDiskNodeScanState.outputVectors[i]; - for (size_t j = 0; j < selSize; ++j) { - dstVector.copyFromVectorData(j, &srcVector, dataChunk.state->getSelVector()[j]); + + // Only copy data if we found a matching output position + if (outputCol != INVALID_COLUMN_ID && + outputCol < iceDiskNodeScanState.data[row].size()) { + // Defensive check: ensure the row index is valid for the source vector + if (row >= srcVector.state->getSelVector().getSelSize()) { + continue; + } + + if (srcVector.isNull(row)) { + iceDiskNodeScanState.data[row][outputCol] = + std::make_unique(Value::createNullValue()); + } else { + iceDiskNodeScanState.data[row][outputCol] = + std::make_unique(*srcVector.getAsValue(row)); + } } } } - - for (size_t i = 0; i < selSize; ++i) { - ((nodeID_t*)iceDiskNodeScanState.nodeIDVector->getData())[i] = nodeID_t{ - iceDiskNodeScanState.currentStartRow + iceDiskNodeScanState.currentRowOffset + i, - nodeTableCatalogEntry->getTableID()}; - } - iceDiskNodeScanState.currentRowOffset += selSize; - iceDiskNodeScanState.outState->getSelVectorUnsafe().setSelSize(selSize); - iceDiskNodeScanState.outState->getSelVectorUnsafe().setToUnfiltered(); - return selSize > 0; } + + iceDiskNodeScanState.dataReadCompleted = true; } -common::row_idx_t IceDiskNodeTable::getNumTotalRows(const Transaction* transaction) { +std::size_t IceDiskNodeTable::getNumTotalRows(const Transaction* transaction) { auto context = transaction->getClientContext(); if (!context) { return 0; } - std::vector columnSkips; - try { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - auto tempReader = std::make_unique(resolvedPath, columnSkips, context); - - if (!tempReader) { - return 0; - } + auto tempReader = std::make_unique(resolvedPath, std::vector(), context); - auto metadata = tempReader->getMetadata(); - return metadata ? metadata->num_rows : 0; + return tempReader->getMetadata()->num_rows; } catch (const std::exception& e) { // If parquet file is corrupted or invalid, return 0 instead of crashing return 0; } } -size_t IceDiskNodeTable::getNumRowGroups(const transaction::Transaction* transaction) const { +std::size_t IceDiskNodeTable::getNumRowGroups(const transaction::Transaction* transaction) const { auto context = transaction->getClientContext(); if (!context) { return 0; } - std::vector columnSkips; - try { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - auto tempReader = std::make_unique(resolvedPath, columnSkips, context); - return tempReader ? tempReader->getNumRowGroups() : 0; + auto tempReader = std::make_unique(resolvedPath, std::vector(), context); + + return tempReader->getNumRowGroups(); } catch (const std::exception& e) { // If parquet file is corrupted or invalid, return 0 instead of crashing return 0; } } -size_t IceDiskNodeTable::getNumScanMorsels(const transaction::Transaction* transaction) const { +std::size_t IceDiskNodeTable::getNumScanMorsels(const transaction::Transaction* transaction) const { return getNumRowGroups(transaction); } diff --git a/src/storage/table/parquet_node_table.cpp b/src/storage/table/parquet_node_table.cpp index bd86f4e0df..f632e1ca32 100644 --- a/src/storage/table/parquet_node_table.cpp +++ b/src/storage/table/parquet_node_table.cpp @@ -98,7 +98,7 @@ common::node_group_idx_t ParquetNodeTable::getNumBatches(const Transaction* tran try { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); auto tempReader = std::make_unique(resolvedPath, columnSkips, context); - return tempReader->getNumRowsGroups(); + return tempReader->getNumRowGroups(); } catch (const std::exception& e) { return 1; // Fallback } diff --git a/src/storage/table/parquet_rel_table.cpp b/src/storage/table/parquet_rel_table.cpp index d9e01b0087..a0bf775c9f 100644 --- a/src/storage/table/parquet_rel_table.cpp +++ b/src/storage/table/parquet_rel_table.cpp @@ -107,7 +107,7 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc // For now, assign all row groups to this scan state (will be partitioned by the scan operator) parquetRelScanState.startRowGroup = 0; parquetRelScanState.endRowGroup = parquetRelScanState.indicesReader ? - parquetRelScanState.indicesReader->getNumRowsGroups() : + parquetRelScanState.indicesReader->getNumRowGroups() : 0; parquetRelScanState.currentRowGroup = parquetRelScanState.startRowGroup; parquetRelScanState.nextRowToProcess = 0; @@ -149,7 +149,7 @@ void ParquetRelTable::loadIndptrData(Transaction* transaction) const { auto context = transaction->getClientContext(); auto vfs = VirtualFileSystem::GetUnsafe(*context); std::vector groupsToRead; - for (uint64_t i = 0; i < indptrReader->getNumRowsGroups(); ++i) { + for (uint64_t i = 0; i < indptrReader->getNumRowGroups(); ++i) { groupsToRead.push_back(i); } From 5a98715e0fdda130cfa58de1c8e38a41b440d101 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Wed, 6 May 2026 15:28:17 +0530 Subject: [PATCH 07/17] move rowGroupStartOffsets into ice disk node table shared state --- src/include/storage/table/ice_disk_node_table.h | 7 ++++--- src/storage/table/ice_disk_node_table.cpp | 10 ++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index 8652fdf0ea..d7964fc3e5 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -35,12 +35,14 @@ struct IceDiskNodeTableScanSharedState { std::mutex mtx; std::size_t currentRowGroupIdx = 0; std::size_t numRowGroups = 0; + std::vector rowGroupStartOffsets; // Starting row offset for each row group in the parquet file public: - void reset(common::node_group_idx_t totalRowGroups) { + void reset(common::node_group_idx_t totalRowGroups, std::vector rowGroupStartOffsets) { std::lock_guard lock(mtx); currentRowGroupIdx = 0; numRowGroups = totalRowGroups; + this->rowGroupStartOffsets = std::move(rowGroupStartOffsets); } @@ -92,8 +94,7 @@ class IceDiskNodeTable final : public NodeTable { private: std::string parquetFilePath; const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; - std::unique_ptr tableScanSharedState; - std::vector rowGroupStartOffsets; // Starting row offset for each row group in the parquet file + mutable std::unique_ptr tableScanSharedState; constexpr static std::size_t scanRowGroupBatchSize = 2048; // Default batch size }; diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index cb9f05aefe..23cbad6b11 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -35,22 +35,20 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, } void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction) { - rowGroupStartOffsets.clear(); - auto context = transaction->getClientContext(); if (context) { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); auto tempReader = std::make_unique(resolvedPath, std::vector(), context); - - tableScanSharedState->reset(tempReader->getNumRowGroups()); - auto metadata = tempReader->getMetadata(); uint64_t currentStartOffset = 0; + std::vector rowGroupStartOffsets; for (std::size_t i = 0; i < metadata->row_groups.size(); ++i) { rowGroupStartOffsets.push_back(currentStartOffset); currentStartOffset += metadata->row_groups[i].num_rows; } + + tableScanSharedState->reset(tempReader->getNumRowGroups(), std::move(rowGroupStartOffsets)); } } @@ -161,7 +159,7 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc auto& nodeID = scanState.nodeIDVector->getValue(i); nodeID.tableID = tableID; // assign parquet rowIndex - nodeID.offset = rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; + nodeID.offset = this->tableScanSharedState->rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; } iceDiskNodeScanState.currentRowGroupBatchOffset += outputSize; From d2b5c4e8573a4b0219d336846f70da1cc30d39fb Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Wed, 6 May 2026 15:54:12 +0530 Subject: [PATCH 08/17] remove this in ice_disk_node_table.cpp --- src/storage/table/ice_disk_node_table.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index 23cbad6b11..afcb6a73b1 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -154,12 +154,11 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc } } - auto tableID = this->getTableID(); for (std::size_t i = 0; i < outputSize; ++i) { auto& nodeID = scanState.nodeIDVector->getValue(i); nodeID.tableID = tableID; // assign parquet rowIndex - nodeID.offset = this->tableScanSharedState->rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; + nodeID.offset = tableScanSharedState->rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; } iceDiskNodeScanState.currentRowGroupBatchOffset += outputSize; @@ -216,16 +215,15 @@ void IceDiskNodeTable::readParquetData(Transaction* transaction, TableScanState& // Get parquet column name and find its corresponding column ID std::string parquetColumnName = iceDiskNodeScanState.parquetReader->getColumnName(parquetCol); - auto nodeTableEntry = this->nodeTableCatalogEntry; // Check if the column exists first before calling getColumnID - if (!nodeTableEntry->containsProperty(parquetColumnName)) { + if (!nodeTableCatalogEntry->containsProperty(parquetColumnName)) { // Column doesn't exist in table schema, skip it continue; } // Find the column ID for this property name - column_id_t parquetColumnID = nodeTableEntry->getColumnID(parquetColumnName); + column_id_t parquetColumnID = nodeTableCatalogEntry->getColumnID(parquetColumnName); // Find which output vector position corresponds to this column ID std::size_t outputCol = INVALID_COLUMN_ID; From f426ddf85aac60271277f9f7a50efcd0902664e1 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 7 May 2026 10:49:45 +0530 Subject: [PATCH 09/17] fix ice-disk rel scan --- .../operator/scan/scan_multi_rel_tables.h | 1 + .../processor/operator/scan/scan_rel_table.h | 1 + .../storage/table/ice_disk_node_table.h | 4 + .../storage/table/ice_disk_rel_table.h | 84 +-- src/include/storage/table/table.h | 2 +- .../operator/scan/scan_multi_rel_tables.cpp | 19 + .../operator/scan/scan_rel_table.cpp | 8 +- src/storage/table/ice_disk_node_table.cpp | 2 +- src/storage/table/ice_disk_rel_table.cpp | 499 ++++++++---------- 9 files changed, 277 insertions(+), 343 deletions(-) diff --git a/src/include/processor/operator/scan/scan_multi_rel_tables.h b/src/include/processor/operator/scan/scan_multi_rel_tables.h index fb31640c64..e6b8142a99 100644 --- a/src/include/processor/operator/scan/scan_multi_rel_tables.h +++ b/src/include/processor/operator/scan/scan_multi_rel_tables.h @@ -67,6 +67,7 @@ class ScanMultiRelTable final : public ScanTable { directionInfo{std::move(directionInfo)}, scanState{nullptr}, boundNodeIDVector{nullptr}, scanners{std::move(scanners)}, currentScanner{nullptr} {} + void initGlobalStateInternal(ExecutionContext* context) override; void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; bool getNextTuplesInternal(ExecutionContext* context) override; diff --git a/src/include/processor/operator/scan/scan_rel_table.h b/src/include/processor/operator/scan/scan_rel_table.h index ee94146137..a344f70dbd 100644 --- a/src/include/processor/operator/scan/scan_rel_table.h +++ b/src/include/processor/operator/scan/scan_rel_table.h @@ -80,6 +80,7 @@ class ScanRelTable final : public ScanTable { bool isSource() const override { return sourceMode; } bool isParallel() const override { return !sourceMode; } + void initGlobalStateInternal(ExecutionContext* context) override; void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; bool getNextTuplesInternal(ExecutionContext* context) override; diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index d7964fc3e5..6761942003 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -54,6 +54,10 @@ struct IceDiskNodeTableScanSharedState { } return false; } + + const std::vector& getRowGroupStartOffsets() const { + return rowGroupStartOffsets; + } }; class IceDiskNodeTable final : public NodeTable { diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index 43630a5d5d..e2cd943d3c 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -13,29 +13,13 @@ namespace storage { class IceDiskRelTable; -struct PendingIceDiskRelRow { - common::sel_t boundNodeSelPos = 0; - common::nodeID_t nbrNodeID; - common::internalID_t relID; - std::vector> propertyValues; - - PendingIceDiskRelRow() = default; - PendingIceDiskRelRow(PendingIceDiskRelRow&&) = default; - PendingIceDiskRelRow& operator=(PendingIceDiskRelRow&&) = default; -}; - struct IceDiskRelTableScanState : public RelTableScanState { +private: + std::mutex mtx; +public: std::unique_ptr indicesReader; std::unique_ptr parquetScanState; - std::vector outputColumnIdx; - std::vector columnSkips; - bool scanCompleted = false; - uint64_t currentStartRow = 0; - uint64_t currentNumRows = 0; - uint64_t currentGlobalRowIdx = 0; - uint64_t nextRowGroupIdx = 0; - std::vector pendingRows; - uint64_t nextPendingRowIdx = 0; + std::size_t currentRowGroupIdx = 0; IceDiskRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector, std::vector outputVectors, @@ -48,48 +32,32 @@ struct IceDiskRelTableScanState : public RelTableScanState { std::vector columnIDs_, std::vector columnPredicateSets_ = {}, common::RelDataDirection direction_ = common::RelDataDirection::FWD) override; + + void initializeIndicesReader(transaction::Transaction* transaction); }; struct IceDiskRelTableScanSharedState { private: std::mutex mtx; - std::vector rowGroupStartRows; - std::vector rowGroupNumRows; - common::node_group_idx_t currentRowGroupIdx = 0; + std::vector indicesRowGroupStartOffsets; // Starting row offset for each row group in the parquet file + std::vector indptrData; // Cached indptr data shared across morsels to avoid redundant I/O public: IceDiskRelTableScanSharedState() {} - void reset(std::vector startRows, std::vector numRows) { + void reset(std::vector indicesRowGroupStartOffsets, std::vector indptrData) { std::lock_guard lock(mtx); - this->rowGroupStartRows = std::move(startRows); - this->rowGroupNumRows = std::move(numRows); - this->currentRowGroupIdx = 0; + this->indicesRowGroupStartOffsets = std::move(indicesRowGroupStartOffsets); + this->indptrData = std::move(indptrData); } - bool getNextMorsel(IceDiskRelTableScanState* scanState, uint64_t& startRow, uint64_t& numRows) { - std::lock_guard lock(mtx); - if (currentRowGroupIdx < rowGroupStartRows.size()) { - scanState->nodeGroupIdx = currentRowGroupIdx; - startRow = rowGroupStartRows[currentRowGroupIdx]; - numRows = rowGroupNumRows[currentRowGroupIdx]; - currentRowGroupIdx++; - return true; - } - return false; + const std::vector& getIndicesRowGroupStartOffsets() const { + return indicesRowGroupStartOffsets; } - bool getMorsel(common::node_group_idx_t morselIdx, uint64_t& startRow, uint64_t& numRows) { - std::lock_guard lock(mtx); - if (morselIdx >= rowGroupStartRows.size()) { - return false; - } - startRow = rowGroupStartRows[morselIdx]; - numRows = rowGroupNumRows[morselIdx]; - return true; + const std::vector& getIndptrData() const { + return indptrData; } - - common::node_group_idx_t getNumMorsels() const { return rowGroupStartRows.size(); } }; class IceDiskRelTable final : public RelTable { @@ -97,9 +65,8 @@ class IceDiskRelTable final : public RelTable { IceDiskRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, common::table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager); - - void initializeScanCoordination(const transaction::Transaction* transaction); - + + void initializeScanCoordination(transaction::Transaction* transaction); void initScanState(transaction::Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec = true) const override; @@ -122,16 +89,21 @@ class IceDiskRelTable final : public RelTable { const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { return relGroupCatalogEntry; } IceDiskRelTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } +private: + std::vector getIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) const; + std::vector readIndptrData(transaction::Transaction* transaction) const; + void copyCachedBoundNodeSelVector(RelTableScanState& relScanState) const; + bool scanRowGroupForBoundNodes(transaction::Transaction* transaction, + IceDiskRelTableScanState& iceDiskScanState, const std::vector& rowGroupsToProcess, + const std::unordered_set& boundNodeOffsets); + std::size_t findSourceNodeForRow(std::size_t globalRowIdx) const; + private: std::string indicesFilePath; std::string indptrFilePath; const catalog::RelGroupCatalogEntry* relGroupCatalogEntry; - mutable std::unique_ptr tableScanSharedState; - mutable std::mutex indptrDataMutex; - mutable std::vector indptrData; - - void loadIndptrData(transaction::Transaction* transaction) const; - common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const; + std::unique_ptr tableScanSharedState; + constexpr static std::size_t scanRowGroupBatchSize = 2048; // Default batch size }; } // namespace storage diff --git a/src/include/storage/table/table.h b/src/include/storage/table/table.h index fc14653c61..c0dd60615e 100644 --- a/src/include/storage/table/table.h +++ b/src/include/storage/table/table.h @@ -60,7 +60,7 @@ struct LBUG_API TableScanState { virtual void setToTable(const transaction::Transaction* transaction, Table* table_, std::vector columnIDs_, - std::vector columnPredicateSets_, + std::vector columnPredicateSets_ = {}, common::RelDataDirection direction = common::RelDataDirection::INVALID); // Note that `resetCachedBoundNodeSelVec` is only applicable to RelTable for now. diff --git a/src/processor/operator/scan/scan_multi_rel_tables.cpp b/src/processor/operator/scan/scan_multi_rel_tables.cpp index 52642a62be..e6fa3f0d56 100644 --- a/src/processor/operator/scan/scan_multi_rel_tables.cpp +++ b/src/processor/operator/scan/scan_multi_rel_tables.cpp @@ -61,6 +61,25 @@ bool RelTableCollectionScanner::scan(main::ClientContext* context, RelTableScanS } } +// only for icebug disk table for now as they have shared state +void ScanMultiRelTable::initGlobalStateInternal(ExecutionContext* context) { + auto transaction = Transaction::Get(*context->clientContext); + for (auto& [_, scanner] : scanners) { + bool hasIceDiskTable = false; + for (auto& relInfo : scanner.relInfos) { + if (const auto iceDiskRelTable = dynamic_cast(relInfo.table)) { + iceDiskRelTable->initializeScanCoordination(transaction); + hasIceDiskTable = true; + break; + } + } + + if (hasIceDiskTable) { + break; + } + } +} + void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { ScanTable::initLocalStateInternal(resultSet, context); auto clientContext = context->clientContext; diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index f736d463d2..c1d97a6ddb 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -70,6 +70,12 @@ void ScanRelTableInfo::initScanState(TableScanState& scanState, initScanStateVectors(scanState, outVectors, MemoryManager::Get(*context)); } +void ScanRelTable::initGlobalStateInternal(ExecutionContext* context) { + if (const auto iceDiskRelTable = dynamic_cast(tableInfo.table)) { + iceDiskRelTable->initializeScanCoordination(transaction::Transaction::Get(*context->clientContext)); + } +} + void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { ScanTable::initLocalStateInternal(resultSet, context); auto clientContext = context->clientContext; @@ -80,6 +86,7 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext auto* parquetTable = dynamic_cast(tableInfo.table); auto* foreignTable = dynamic_cast(tableInfo.table); auto* iceDiskTable = dynamic_cast(tableInfo.table); + if (arrowTable) { scanState = std::make_unique(*MemoryManager::Get(*clientContext), @@ -99,7 +106,6 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); } - tableInfo.table->initializeScanCoordination(transaction::Transaction::Get(*clientContext)); tableInfo.initScanState(*scanState, outVectors, clientContext); if (sourceMode) { currentSourceTableIdx = 0; diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index afcb6a73b1..b1f412dea8 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -158,7 +158,7 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc auto& nodeID = scanState.nodeIDVector->getValue(i); nodeID.tableID = tableID; // assign parquet rowIndex - nodeID.offset = tableScanSharedState->rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; + nodeID.offset = tableScanSharedState->getRowGroupStartOffsets()[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; } iceDiskNodeScanState.currentRowGroupBatchOffset += outputSize; diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 5b33e1620e..6600e5db2c 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -17,124 +17,25 @@ using namespace lbug::catalog; namespace lbug { namespace storage { -namespace { - -constexpr int64_t REL_ID_OUTPUT_COLUMN = -2; - -std::string getRelPropertyNameForColumnID(const RelGroupCatalogEntry& entry, column_id_t columnID) { - for (const auto& property : entry.getProperties()) { - if (entry.getColumnID(property.getName()) == columnID) { - return property.getName(); - } - } - throw RuntimeException("Column ID " + std::to_string(columnID) + - " does not map to an icebug-disk rel property."); -} - -void copyCachedBoundNodeSelVector(RelTableScanState& relScanState) { - if (relScanState.nodeIDVector->state->getSelVector().isUnfiltered()) { - relScanState.cachedBoundNodeSelVector.setToUnfiltered(); - } else { - relScanState.cachedBoundNodeSelVector.setToFiltered(); - memcpy(relScanState.cachedBoundNodeSelVector.getMutableBuffer().data(), - relScanState.nodeIDVector->state->getSelVector().getMutableBuffer().data(), - relScanState.nodeIDVector->state->getSelVector().getSelSize() * sizeof(sel_t)); - } - relScanState.cachedBoundNodeSelVector.setSelSize( - relScanState.nodeIDVector->state->getSelVector().getSelSize()); -} - -void emitPendingRow(IceDiskRelTableScanState& scanState) { - auto& row = scanState.pendingRows[scanState.nextPendingRowIdx++]; - scanState.setNodeIDVectorToFlat(row.boundNodeSelPos); - for (size_t outCol = 0; outCol < scanState.columnIDs.size(); ++outCol) { - auto columnID = scanState.columnIDs[outCol]; - if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { - continue; - } - if (columnID == NBR_ID_COLUMN_ID) { - scanState.outputVectors[outCol]->setValue(0, row.nbrNodeID); - } else if (columnID == REL_ID_COLUMN_ID) { - scanState.outputVectors[outCol]->setValue(0, row.relID); - } else if (outCol < row.propertyValues.size() && row.propertyValues[outCol]) { - scanState.outputVectors[outCol]->copyFromValue(0, *row.propertyValues[outCol]); - } - } - scanState.outState->getSelVectorUnsafe().setToUnfiltered(1); - if (scanState.nextPendingRowIdx >= scanState.pendingRows.size()) { - scanState.pendingRows.clear(); - scanState.nextPendingRowIdx = 0; - } -} - -} // namespace - void IceDiskRelTableScanState::setToTable(const Transaction* transaction, Table* table_, std::vector columnIDs_, std::vector columnPredicateSets_, common::RelDataDirection direction_) { - table = table_; - columnIDs = std::move(columnIDs_); - columnPredicateSets = std::move(columnPredicateSets_); + // Call base class implementation but skip local table setup + TableScanState::setToTable(transaction, table_, std::move(columnIDs_), + std::move(columnPredicateSets_)); direction = direction_; +} - auto& iceDiskRelTable = table_->cast(); - auto context = transaction->getClientContext(); - auto resolvedPath = VirtualFileSystem::resolvePath(context, iceDiskRelTable.getIndicesFilePath()); - - std::vector dummySkips; - indicesReader = std::make_unique(resolvedPath, dummySkips, context); - auto tempState = std::make_unique(); - std::vector dummyGroups; - indicesReader->initializeScan(*tempState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); - - auto entry = iceDiskRelTable.getRelGroupCatalogEntry(); - outputColumnIdx.assign(columnIDs.size(), INVALID_COLUMN_ID); - columnSkips.assign(indicesReader->getNumColumns(), true); - - for (size_t outputCol = 0; outputCol < columnIDs.size(); ++outputCol) { - auto columnID = columnIDs[outputCol]; - if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { - continue; - } - if (columnID == NBR_ID_COLUMN_ID) { - bool found = false; - for (uint32_t i = 0; i < indicesReader->getNumColumns(); i++) { - if (indicesReader->getColumnName(i) == "nbr_id" || i == 0) { - outputColumnIdx[outputCol] = static_cast(i); - columnSkips[i] = false; - found = true; - break; - } - } - if (!found) { - throw RuntimeException("nbr_id column not found in indices parquet"); - } - continue; - } - if (columnID == REL_ID_COLUMN_ID) { - outputColumnIdx[outputCol] = REL_ID_OUTPUT_COLUMN; - continue; - } +void IceDiskRelTableScanState::initializeIndicesReader(Transaction* transaction) { + if (!indicesReader) { + std::lock_guard lock(mtx); - auto propertyName = getRelPropertyNameForColumnID(*entry, columnID); - bool found = false; - for (uint32_t i = 0; i < indicesReader->getNumColumns(); i++) { - if (indicesReader->getColumnName(i) == propertyName) { - outputColumnIdx[outputCol] = static_cast(i); - columnSkips[i] = false; - found = true; - break; - } - } - if (!found) { - throw RuntimeException("Property " + propertyName + " not found in parquet file"); + if(!indicesReader) { // Double-checked locking to avoid redundant initialization + auto* iceDiskRelTable = static_cast(table); + indicesReader = std::make_unique(iceDiskRelTable->getIndicesFilePath(), std::vector(), transaction->getClientContext()); } } - - indicesReader = std::make_unique(resolvedPath, columnSkips, context); - processor::ParquetReaderScanState initializedState; - indicesReader->initializeScan(initializedState, dummyGroups, VirtualFileSystem::GetUnsafe(*context)); } IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, @@ -155,218 +56,153 @@ IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::ta tableScanSharedState = std::make_unique(); } -void IceDiskRelTable::initializeScanCoordination(const Transaction* transaction) { - auto context = transaction->getClientContext(); - auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); - std::vector dummySkips; - processor::ParquetReader reader(resolvedPath, dummySkips, context); - - auto metadata = reader.getMetadata(); - std::vector rowGroupStartRows; - std::vector rowGroupNumRows; - size_t currentOffset = 0; - - for (auto i = 0u; i < metadata->row_groups.size(); ++i) { - rowGroupStartRows.push_back(currentOffset); - rowGroupNumRows.push_back(metadata->row_groups[i].num_rows); - currentOffset += metadata->row_groups[i].num_rows; - } +void IceDiskRelTable::initializeScanCoordination(Transaction* transaction) { + auto indicesRowGroupStartOffsets = getIndicesRowGroupStartOffsets(transaction); + auto indptrData = readIndptrData(transaction); - tableScanSharedState->reset(rowGroupStartRows, rowGroupNumRows); + tableScanSharedState->reset(indicesRowGroupStartOffsets, indptrData); } -void IceDiskRelTable::initScanState(Transaction* /*transaction*/, TableScanState& scanState, +void IceDiskRelTable::initScanState(Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec) const { auto& relScanState = scanState.cast(); - relScanState.source = TableScanSource::COMMITTED; - relScanState.nodeGroup = nullptr; - relScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + + // For morsel-driven parallelism, each scan state maintains its own bound node processing state + // No shared state needed between threads if (resetCachedBoundNodeSelVec) { + // Copy the cached bound node selection vector from the scan state copyCachedBoundNodeSelVector(relScanState); } auto& iceDiskScanState = static_cast(relScanState); - iceDiskScanState.scanCompleted = false; - iceDiskScanState.currentStartRow = 0; - iceDiskScanState.currentNumRows = 0; - iceDiskScanState.currentGlobalRowIdx = 0; - iceDiskScanState.nextRowGroupIdx = 0; - iceDiskScanState.pendingRows.clear(); - iceDiskScanState.nextPendingRowIdx = 0; + iceDiskScanState.initializeIndicesReader(transaction); + iceDiskScanState.currentRowGroupIdx = 0; } -void IceDiskRelTable::loadIndptrData(Transaction* transaction) const { - std::lock_guard lock(indptrDataMutex); - if (!indptrData.empty()) { - return; - } +bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& scanState) { + auto& iceDiskScanState = static_cast(scanState); - auto context = transaction->getClientContext(); - auto vfs = VirtualFileSystem::GetUnsafe(*context); - auto resolvedPath = VirtualFileSystem::resolvePath(context, indptrFilePath); - std::vector dummySkips; - auto indptrReader = std::make_unique(resolvedPath, dummySkips, context); - - auto scanState = std::make_unique(); - std::vector groupsToRead; - for (uint64_t i = 0; i < indptrReader->getMetadata()->row_groups.size(); ++i) { - groupsToRead.push_back(i); - } - indptrReader->initializeScan(*scanState, groupsToRead, vfs); - - DataChunk dataChunk(1); - dataChunk.insert(0, std::make_shared(LogicalType::UINT64(), MemoryManager::Get(*context))); - - while (indptrReader->scanInternal(*scanState, dataChunk)) { - auto selSize = dataChunk.state->getSelVector().getSelSize(); - auto& vector = dataChunk.getValueVectorMutable(0); - for (size_t i = 0; i < selSize; ++i) { - indptrData.push_back(((uint64_t*)vector.getData())[dataChunk.state->getSelVector()[i]]); - } + scanState.resetOutVectors(); + + // Check if we have any row groups left to process + if (iceDiskScanState.currentRowGroupIdx >= tableScanSharedState->getIndicesRowGroupStartOffsets().size()) { + // No more row groups to process + auto newSelVector = std::make_shared(0); + iceDiskScanState.outState->setSelVector(newSelVector); + return false; } -} -common::offset_t IceDiskRelTable::findSourceNodeForRow(common::offset_t globalRowIdx) const { - auto it = std::upper_bound(indptrData.cbegin(), indptrData.cend(), (common::offset_t)globalRowIdx); - if (it == indptrData.cbegin()) { - return INVALID_OFFSET; + // Process the current row group + std::vector rowGroupsToProcess = {iceDiskScanState.currentRowGroupIdx}; + + // Create a set of bound node IDs for fast lookup + std::unordered_set boundNodeOffsets; + for (size_t i = 0; i < iceDiskScanState.cachedBoundNodeSelVector.getSelSize(); ++i) { + common::sel_t boundNodeIdx = iceDiskScanState.cachedBoundNodeSelVector[i]; + const auto boundNodeID = iceDiskScanState.nodeIDVector->getValue(boundNodeIdx); + boundNodeOffsets.insert(boundNodeID.offset); } - return std::distance(indptrData.cbegin(), it) - 1; + + // Scan the current row group and collect relationships for bound nodes + bool hasData = scanRowGroupForBoundNodes(transaction, iceDiskScanState, rowGroupsToProcess, + boundNodeOffsets); + + // Move to next row group for next call + iceDiskScanState.currentRowGroupIdx++; + + return hasData; } -bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& scanState) { - auto& iceDiskScanState = static_cast(scanState); - if (iceDiskScanState.scanCompleted) { +bool IceDiskRelTable::scanRowGroupForBoundNodes(Transaction* transaction, + IceDiskRelTableScanState& iceDiskScanState, const std::vector& rowGroupsToProcess, + const std::unordered_set& boundNodeOffsets) { + if (!iceDiskScanState.indicesReader) { return false; } - if (iceDiskScanState.nextPendingRowIdx < iceDiskScanState.pendingRows.size()) { - emitPendingRow(iceDiskScanState); - return true; + // Initialize scan state for the assigned row groups + auto context = transaction->getClientContext(); + auto vfs = VirtualFileSystem::GetUnsafe(*context); + iceDiskScanState.indicesReader->initializeScan(*iceDiskScanState.parquetScanState, + rowGroupsToProcess, vfs); + + // Create DataChunk matching the indices parquet file schema + auto numIndicesColumns = iceDiskScanState.indicesReader->getNumColumns(); + DataChunk indicesChunk(numIndicesColumns); + + // Insert value vectors for all columns in the parquet file + auto memoryManager = MemoryManager::Get(*context); + for (uint32_t colIdx = 0; colIdx < numIndicesColumns; ++colIdx) { + const auto& columnTypeRef = iceDiskScanState.indicesReader->getColumnType(colIdx); + auto columnType = columnTypeRef.copy(); + auto vector = std::make_shared(std::move(columnType), memoryManager); + indicesChunk.insert(colIdx, vector); } - loadIndptrData(transaction); - scanState.resetOutVectors(); + // Scan the row groups and collect relationships for bound nodes. + const auto isFwd = iceDiskScanState.direction != RelDataDirection::BWD; + uint64_t totalRowsCollected = 0; + uint64_t currentGlobalRowIdx = 0; - std::unordered_map boundNodeSelPosByOffset; - boundNodeSelPosByOffset.reserve(iceDiskScanState.cachedBoundNodeSelVector.getSelSize()); - for (size_t i = 0; i < iceDiskScanState.cachedBoundNodeSelVector.getSelSize(); ++i) { - auto pos = iceDiskScanState.cachedBoundNodeSelVector[i]; - boundNodeSelPosByOffset.emplace( - ((nodeID_t*)iceDiskScanState.nodeIDVector->getData())[pos].offset, pos); + // Calculate the starting global row index for the first row group + if (!rowGroupsToProcess.empty()) { + currentGlobalRowIdx = tableScanSharedState->getIndicesRowGroupStartOffsets()[rowGroupsToProcess[0]]; } - auto context = transaction->getClientContext(); - auto vfs = VirtualFileSystem::GetUnsafe(*context); - auto numColumns = iceDiskScanState.indicesReader->getNumColumns(); - DataChunk indicesChunk(numColumns); - uint32_t nbrColumnIdx = 0; - for (uint32_t i = 0; i < numColumns; ++i) { - if (iceDiskScanState.indicesReader->getColumnName(i) == "nbr_id") { - nbrColumnIdx = i; - break; - } - } - for (uint32_t i = 0; i < numColumns; ++i) { - indicesChunk.insert(i, std::make_shared( - iceDiskScanState.indicesReader->getColumnType(i).copy(), - MemoryManager::Get(*context))); - } - const auto nbrTableID = - iceDiskScanState.direction == RelDataDirection::BWD ? getFromNodeTableID() : getToNodeTableID(); - - while (true) { - if (iceDiskScanState.nodeGroupIdx == INVALID_NODE_GROUP_IDX) { - uint64_t startRow = 0; - uint64_t numRows = 0; - if (!tableScanSharedState->getMorsel( - static_cast(iceDiskScanState.nextRowGroupIdx), - startRow, numRows)) { - iceDiskScanState.scanCompleted = true; - return false; - } - iceDiskScanState.nodeGroupIdx = - static_cast(iceDiskScanState.nextRowGroupIdx++); - bool overlap = iceDiskScanState.direction == RelDataDirection::BWD; - if (!overlap) { - auto startNode = findSourceNodeForRow(startRow); - auto endNode = findSourceNodeForRow(startRow + numRows - 1); - for (const auto& [boundOffset, _] : boundNodeSelPosByOffset) { - if (boundOffset >= startNode && - (startNode == endNode || - (endNode != INVALID_OFFSET && boundOffset <= endNode))) { - overlap = true; - break; - } - } - } + while (totalRowsCollected < IceDiskRelTable::scanRowGroupBatchSize && + iceDiskScanState.indicesReader->scanInternal(*iceDiskScanState.parquetScanState, + indicesChunk)) { + + auto selSize = indicesChunk.state->getSelVector().getSelSize(); - if (!overlap) { - iceDiskScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; + for (size_t i = 0; i < selSize && totalRowsCollected < IceDiskRelTable::scanRowGroupBatchSize; + ++i, ++currentGlobalRowIdx) { + // Find which source node this row belongs to. + const auto sourceNodeOffset = findSourceNodeForRow(currentGlobalRowIdx); + + // Column 0 in indices file is the destination node offset. + const auto dstOffset = indicesChunk.getValueVector(0).getValue(i); + const auto boundOffset = isFwd ? sourceNodeOffset : dstOffset; + + // not a bound node, skip + if (boundNodeOffsets.find(boundOffset) == boundNodeOffsets.end()) { continue; } - iceDiskScanState.currentStartRow = startRow; - iceDiskScanState.currentNumRows = numRows; - iceDiskScanState.currentGlobalRowIdx = startRow; - std::vector groupsToRead = {iceDiskScanState.nodeGroupIdx}; - iceDiskScanState.indicesReader->initializeScan(*iceDiskScanState.parquetScanState, - groupsToRead, vfs); - } + const auto nbrOffset = isFwd ? dstOffset : sourceNodeOffset; + const auto nbrTableID = isFwd ? getToNodeTableID() : getFromNodeTableID(); + auto nbrNodeID = internalID_t(nbrOffset, nbrTableID); - indicesChunk.state->getSelVectorUnsafe().setSelSize(0); - iceDiskScanState.indicesReader->scan(*iceDiskScanState.parquetScanState, indicesChunk); - if (indicesChunk.state->getSelVector().getSelSize() == 0) { - iceDiskScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX; - continue; - } - - auto selSize = indicesChunk.state->getSelVector().getSelSize(); - for (size_t i = 0; i < selSize; ++i) { - auto pos = indicesChunk.state->getSelVector()[i]; - auto globalRowIdx = iceDiskScanState.currentGlobalRowIdx + i; - auto srcOffset = findSourceNodeForRow(globalRowIdx); - auto& nbrVec = indicesChunk.getValueVectorMutable(nbrColumnIdx); - auto dstOffset = nbrVec.getValue(pos); - const auto boundOffset = - iceDiskScanState.direction == RelDataDirection::BWD ? dstOffset : srcOffset; - if (!boundNodeSelPosByOffset.contains(boundOffset)) { - continue; + // outputVectors[0] is the neighbor node ID, if requested. + if (!iceDiskScanState.outputVectors.empty()) { + iceDiskScanState.outputVectors[0]->setValue(totalRowsCollected, nbrNodeID); } - PendingIceDiskRelRow row; - row.boundNodeSelPos = boundNodeSelPosByOffset.at(boundOffset); - row.relID = internalID_t{globalRowIdx, getTableID()}; - row.propertyValues.resize(iceDiskScanState.columnIDs.size()); - for (size_t outCol = 0; outCol < iceDiskScanState.columnIDs.size(); ++outCol) { - auto columnID = iceDiskScanState.columnIDs[outCol]; - if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID || - columnID == REL_ID_COLUMN_ID) { - continue; - } - auto parquetColIdx = iceDiskScanState.outputColumnIdx[outCol]; - if (parquetColIdx < 0) { - continue; - } - auto& vec = indicesChunk.getValueVectorMutable(static_cast(parquetColIdx)); - if (columnID == NBR_ID_COLUMN_ID) { - auto nbrOffset = - iceDiskScanState.direction == RelDataDirection::BWD ? srcOffset : dstOffset; - row.nbrNodeID = internalID_t{nbrOffset, nbrTableID}; - } else { - row.propertyValues[outCol] = vec.getAsValue(pos); - } + // If there are additional columns (e.g., weight), copy them to subsequent output + // vectors These are property columns and should have matching types + for (uint32_t colIdx = 1; + colIdx < numIndicesColumns && colIdx < iceDiskScanState.outputVectors.size(); + ++colIdx) { + iceDiskScanState.outputVectors[colIdx]->copyFromVectorData(totalRowsCollected, + &indicesChunk.getValueVector(colIdx), i); } - iceDiskScanState.pendingRows.push_back(std::move(row)); - } - iceDiskScanState.currentGlobalRowIdx += selSize; - if (iceDiskScanState.pendingRows.empty()) { - continue; + totalRowsCollected++; } - emitPendingRow(iceDiskScanState); - return true; } + + // No data found + if (totalRowsCollected <= 0) { + auto selVector = std::make_shared(0); + iceDiskScanState.outState->setSelVector(selVector); + return false; + } + + auto selVector = std::make_shared(totalRowsCollected); + selVector->setToUnfiltered(totalRowsCollected); + iceDiskScanState.outState->setSelVector(selVector); + + return true; } common::row_idx_t IceDiskRelTable::getNumTotalRows(const Transaction* transaction) { @@ -377,5 +213,100 @@ common::row_idx_t IceDiskRelTable::getNumTotalRows(const Transaction* transactio return reader.getMetadata()->num_rows; } +std::vector IceDiskRelTable::getIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) const { + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); + processor::ParquetReader reader(resolvedPath, std::vector(), context); + + auto metadata = reader.getMetadata(); + std::vector startOffsets; + std::size_t currentOffset = 0; + + for (auto i = 0u; i < metadata->row_groups.size(); ++i) { + startOffsets.push_back(currentOffset); + currentOffset += metadata->row_groups[i].num_rows; + } + + return startOffsets; +} + +std::vector IceDiskRelTable::readIndptrData(Transaction* transaction) const { + auto context = transaction->getClientContext(); + auto vfs = VirtualFileSystem::GetUnsafe(*context); + auto resolvedPath = VirtualFileSystem::resolvePath(context, indptrFilePath); + auto indptrReader = std::make_unique(resolvedPath, std::vector(), context); + processor::ParquetReaderScanState scanState; + std::vector groupsToRead; + std::vector indptrData; + + for (uint64_t i = 0; i < indptrReader->getMetadata()->row_groups.size(); ++i) { + groupsToRead.push_back(i); + } + + indptrReader->initializeScan(scanState, groupsToRead, vfs); + + // Check if the indptr file has any columns after scan initialization + auto numColumns = indptrReader->getNumColumns(); + if (numColumns == 0) { + throw RuntimeException("Indptr parquet file has no columns"); + } + + // Validate column type for indptr + const auto& indptrType = indptrReader->getColumnType(0); + if (!LogicalTypeUtils::isIntegral(indptrType.getLogicalTypeID())) { + throw RuntimeException( + "Indptr parquet file column must be integer type (column 0)"); + } + + DataChunk dataChunk(1); + const auto& columnTypeRef = indptrReader->getColumnType(0); + auto columnType = columnTypeRef.copy(); + auto vector = std::make_shared(std::move(columnType), MemoryManager::Get(*context)); + dataChunk.insert(0, vector); + + while (indptrReader->scanInternal(scanState, dataChunk)) { + auto selVector = dataChunk.state->getSelVectorShared(); + auto selSize = selVector->getSelSize(); + auto& valVector = dataChunk.getValueVector(0); + + for (std::size_t i = 0; i < selSize; ++i) { + auto value = valVector.getValue((*selVector)[i]); + indptrData.push_back(value); + } + } + + return indptrData; +} + +void IceDiskRelTable::copyCachedBoundNodeSelVector(RelTableScanState& relScanState) const { + if (relScanState.nodeIDVector->state->getSelVector().isUnfiltered()) { + relScanState.cachedBoundNodeSelVector.setToUnfiltered(); + } else { + relScanState.cachedBoundNodeSelVector.setToFiltered(); + memcpy(relScanState.cachedBoundNodeSelVector.getMutableBuffer().data(), + relScanState.nodeIDVector->state->getSelVector().getMutableBuffer().data(), + relScanState.nodeIDVector->state->getSelVector().getSelSize() * sizeof(sel_t)); + } + relScanState.cachedBoundNodeSelVector.setSelSize( + relScanState.nodeIDVector->state->getSelVector().getSelSize()); +} + +std::size_t IceDiskRelTable::findSourceNodeForRow(std::size_t globalRowIdx) const { + const auto& indptrData = tableScanSharedState->getIndptrData(); + if (indptrData.empty()) { + throw RuntimeException("Indptr data not loaded for CSR format"); + } + + // Binary search to find the source node + // indptrData[i] contains the start row index for source node i + // Find the largest i where indptrData[i] <= globalRowIdx + auto it = std::upper_bound(indptrData.begin(), indptrData.end(), globalRowIdx); + if (it == indptrData.begin()) { + throw RuntimeException("Invalid global row index: " + std::to_string(globalRowIdx)); + } + --it; + return static_cast(std::distance(indptrData.begin(), it)); +} + } // namespace storage } // namespace lbug From b49b49ec4fb4fd0a234073ec589922e65ae0350e Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 7 May 2026 11:36:33 +0530 Subject: [PATCH 10/17] move const data out of ice-disk shared states --- .../storage/table/ice_disk_node_table.h | 25 ++++++---------- .../storage/table/ice_disk_rel_table.h | 30 +++---------------- src/storage/table/ice_disk_node_table.cpp | 6 ++-- src/storage/table/ice_disk_rel_table.cpp | 12 +++----- 4 files changed, 20 insertions(+), 53 deletions(-) diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index 6761942003..c22285be5a 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include "storage/table/node_table.h" @@ -30,33 +30,25 @@ struct IceDiskNodeTableScanState : public TableScanState { } }; +// Shared state for morsel assignment across parallel scan threads struct IceDiskNodeTableScanSharedState { private: - std::mutex mtx; - std::size_t currentRowGroupIdx = 0; std::size_t numRowGroups = 0; - std::vector rowGroupStartOffsets; // Starting row offset for each row group in the parquet file + std::atomic currentRowGroupIdx{0}; public: - void reset(common::node_group_idx_t totalRowGroups, std::vector rowGroupStartOffsets) { - std::lock_guard lock(mtx); - currentRowGroupIdx = 0; + void reset(std::size_t totalRowGroups) { numRowGroups = totalRowGroups; - this->rowGroupStartOffsets = std::move(rowGroupStartOffsets); + currentRowGroupIdx.store(0, std::memory_order_relaxed); } - bool getNextMorsel(IceDiskNodeTableScanState* scanState) { - std::lock_guard lock(mtx); - if (currentRowGroupIdx < numRowGroups) { - scanState->currentRowGroupIdx = currentRowGroupIdx++; + auto idx = currentRowGroupIdx.fetch_add(1, std::memory_order_relaxed); + if (idx < numRowGroups) { + scanState->currentRowGroupIdx = idx; return true; } return false; - } - - const std::vector& getRowGroupStartOffsets() const { - return rowGroupStartOffsets; } }; @@ -98,6 +90,7 @@ class IceDiskNodeTable final : public NodeTable { private: std::string parquetFilePath; const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; + std::vector rowGroupStartOffsets; mutable std::unique_ptr tableScanSharedState; constexpr static std::size_t scanRowGroupBatchSize = 2048; // Default batch size }; diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index e2cd943d3c..a89539d535 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -36,30 +36,6 @@ struct IceDiskRelTableScanState : public RelTableScanState { void initializeIndicesReader(transaction::Transaction* transaction); }; -struct IceDiskRelTableScanSharedState { -private: - std::mutex mtx; - std::vector indicesRowGroupStartOffsets; // Starting row offset for each row group in the parquet file - std::vector indptrData; // Cached indptr data shared across morsels to avoid redundant I/O - -public: - IceDiskRelTableScanSharedState() {} - - void reset(std::vector indicesRowGroupStartOffsets, std::vector indptrData) { - std::lock_guard lock(mtx); - this->indicesRowGroupStartOffsets = std::move(indicesRowGroupStartOffsets); - this->indptrData = std::move(indptrData); - } - - const std::vector& getIndicesRowGroupStartOffsets() const { - return indicesRowGroupStartOffsets; - } - - const std::vector& getIndptrData() const { - return indptrData; - } -}; - class IceDiskRelTable final : public RelTable { public: IceDiskRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, @@ -87,7 +63,6 @@ class IceDiskRelTable final : public RelTable { const std::string& getIndicesFilePath() const { return indicesFilePath; } const std::string& getIndptrFilePath() const { return indptrFilePath; } const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { return relGroupCatalogEntry; } - IceDiskRelTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } private: std::vector getIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) const; @@ -102,7 +77,10 @@ class IceDiskRelTable final : public RelTable { std::string indicesFilePath; std::string indptrFilePath; const catalog::RelGroupCatalogEntry* relGroupCatalogEntry; - std::unique_ptr tableScanSharedState; + // Row group start offsets derived from Parquet metadata; stable for the lifetime of the table. + std::vector indicesRowGroupStartOffsets; + // Full CSR indptr array loaded once from disk; stable for the lifetime of the table. + std::vector indptrData; constexpr static std::size_t scanRowGroupBatchSize = 2048; // Default batch size }; diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index b1f412dea8..3be1b1f82e 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -41,14 +41,14 @@ void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction auto tempReader = std::make_unique(resolvedPath, std::vector(), context); auto metadata = tempReader->getMetadata(); uint64_t currentStartOffset = 0; - std::vector rowGroupStartOffsets; + rowGroupStartOffsets.clear(); for (std::size_t i = 0; i < metadata->row_groups.size(); ++i) { rowGroupStartOffsets.push_back(currentStartOffset); currentStartOffset += metadata->row_groups[i].num_rows; } - tableScanSharedState->reset(tempReader->getNumRowGroups(), std::move(rowGroupStartOffsets)); + tableScanSharedState->reset(tempReader->getNumRowGroups()); } } @@ -158,7 +158,7 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc auto& nodeID = scanState.nodeIDVector->getValue(i); nodeID.tableID = tableID; // assign parquet rowIndex - nodeID.offset = tableScanSharedState->getRowGroupStartOffsets()[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; + nodeID.offset = rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; } iceDiskNodeScanState.currentRowGroupBatchOffset += outputSize; diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 6600e5db2c..6d6bf978d9 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -53,14 +53,11 @@ IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::ta indicesFilePath = relGroupEntry->getIndicesPath(); indptrFilePath = relGroupEntry->getIndptrPath(); - tableScanSharedState = std::make_unique(); } void IceDiskRelTable::initializeScanCoordination(Transaction* transaction) { - auto indicesRowGroupStartOffsets = getIndicesRowGroupStartOffsets(transaction); - auto indptrData = readIndptrData(transaction); - - tableScanSharedState->reset(indicesRowGroupStartOffsets, indptrData); + indicesRowGroupStartOffsets = getIndicesRowGroupStartOffsets(transaction); + indptrData = readIndptrData(transaction); } void IceDiskRelTable::initScanState(Transaction* transaction, TableScanState& scanState, @@ -85,7 +82,7 @@ bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& sca scanState.resetOutVectors(); // Check if we have any row groups left to process - if (iceDiskScanState.currentRowGroupIdx >= tableScanSharedState->getIndicesRowGroupStartOffsets().size()) { + if (iceDiskScanState.currentRowGroupIdx >= indicesRowGroupStartOffsets.size()) { // No more row groups to process auto newSelVector = std::make_shared(0); iceDiskScanState.outState->setSelVector(newSelVector); @@ -146,7 +143,7 @@ bool IceDiskRelTable::scanRowGroupForBoundNodes(Transaction* transaction, // Calculate the starting global row index for the first row group if (!rowGroupsToProcess.empty()) { - currentGlobalRowIdx = tableScanSharedState->getIndicesRowGroupStartOffsets()[rowGroupsToProcess[0]]; + currentGlobalRowIdx = indicesRowGroupStartOffsets[rowGroupsToProcess[0]]; } while (totalRowsCollected < IceDiskRelTable::scanRowGroupBatchSize && @@ -292,7 +289,6 @@ void IceDiskRelTable::copyCachedBoundNodeSelVector(RelTableScanState& relScanSta } std::size_t IceDiskRelTable::findSourceNodeForRow(std::size_t globalRowIdx) const { - const auto& indptrData = tableScanSharedState->getIndptrData(); if (indptrData.empty()) { throw RuntimeException("Indptr data not loaded for CSR format"); } From 532d776e9b87bcd082394562bebb356155fd1090 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 7 May 2026 16:47:24 +0530 Subject: [PATCH 11/17] fix demo_db ice_disk storage paths --- src/binder/bind/bind_ddl.cpp | 2 +- test/storage/CMakeLists.txt | 1 - test/storage/ice_disk_test.cpp | 157 --------------------- test/test_files/graph/parquet_rel_bwd.test | 39 +++++ test/test_helper/test_helper.cpp | 13 +- 5 files changed, 49 insertions(+), 163 deletions(-) delete mode 100644 test/storage/ice_disk_test.cpp create mode 100644 test/test_files/graph/parquet_rel_bwd.test diff --git a/src/binder/bind/bind_ddl.cpp b/src/binder/bind/bind_ddl.cpp index 8e729208aa..d69f7519d5 100644 --- a/src/binder/bind/bind_ddl.cpp +++ b/src/binder/bind/bind_ddl.cpp @@ -245,7 +245,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* if (!storage.empty()) { auto dotPos = storage.find('.'); // Check if storage is database.table format by verifying the attached database exists - // Otherwise, treat as file path (e.g., "dataset/demo-db/icebug-disk/demo" or + // Otherwise, treat as file path (e.g., "dataset/demo-db/graph-std/demo" or // "data.parquet") if (dotPos != std::string::npos) { std::string dbName = storage.substr(0, dotPos); diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt index 15b7ccb885..e63f929d90 100644 --- a/test/storage/CMakeLists.txt +++ b/test/storage/CMakeLists.txt @@ -4,7 +4,6 @@ add_lbug_test(column_chunk_metadata_test column_chunk_metadata_test.cpp) add_lbug_test(local_hash_index_test local_hash_index_test.cpp) add_lbug_test(buffer_manager_test buffer_manager_test.cpp) add_lbug_test(rel_tests rel_scan_test.cpp rel_delete_test.cpp) -add_lbug_test(ice_disk_test ice_disk_test.cpp) add_lbug_test(node_update_test node_update_test.cpp) add_lbug_test(detach_delete_test detach_delete_test.cpp) add_lbug_test(storage_utils_test storage_utils_test.cpp) diff --git a/test/storage/ice_disk_test.cpp b/test/storage/ice_disk_test.cpp deleted file mode 100644 index 9c89e66ac7..0000000000 --- a/test/storage/ice_disk_test.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include "graph_test/private_graph_test.h" -#include "storage/storage_manager.h" -#include "storage/table/ice_disk_node_table.h" -#include "storage/table/ice_disk_rel_table.h" -#include "main/client_context.h" -#include "transaction/transaction.h" -#include "catalog/catalog.h" -#include "catalog/catalog_entry/rel_group_catalog_entry.h" -#include "catalog/catalog_entry/node_table_catalog_entry.h" -#include "storage/table/csr_node_group.h" - -using namespace lbug::common; -using namespace lbug::storage; -using namespace lbug::transaction; -using namespace lbug::catalog; - -namespace lbug { -namespace testing { - -class IceDiskStorageTest : public DBTest { -public: - std::string getInputDir() override { return TestHelper::appendLbugRootPath("dataset/demo-db/icebug-disk/"); } - - void SetUp() override { - DBTest::SetUp(); - conn->query("BEGIN TRANSACTION"); - context = getClientContext(*conn); - storageManager = database->getStorageManager(); - } - - main::ClientContext* context; - StorageManager* storageManager; -}; - -TEST_F(IceDiskStorageTest, NodeTableScanTest) { - auto catalog = Catalog::Get(*context); - auto transaction = Transaction::Get(*context); - auto tableEntry = catalog->getTableCatalogEntry(transaction, "user"); - ASSERT_NE(tableEntry, nullptr); - auto tableID = tableEntry->getTableID(); - auto table = storageManager->getTable(tableID); - auto nodeTable = dynamic_cast(table); - - ASSERT_NE(nodeTable, nullptr); - EXPECT_EQ(nodeTable->getNumTotalRows(transaction), 4); - - auto nodeIDVector = std::make_unique(LogicalType::INTERNAL_ID(), database->getMemoryManager()); - auto nameVector = std::make_unique(LogicalType::STRING(), database->getMemoryManager()); - auto ageVector = std::make_unique(LogicalType::INT64(), database->getMemoryManager()); - - std::vector outputVectors = {nameVector.get(), ageVector.get()}; - auto outState = std::make_shared(); - IceDiskNodeTableScanState scanState(nodeIDVector.get(), outputVectors, outState); - - // name is column 1, age is column 2 - scanState.setToTable(transaction, nodeTable, {1, 2}); - nodeTable->initializeScanCoordination(transaction); - nodeTable->initScanState(transaction, scanState); - - int count = 0; - while (nodeTable->scanInternal(transaction, scanState)) { - auto selSize = outState->getSelVector().getSelSize(); - for (auto i = 0u; i < selSize; i++) { - auto pos = outState->getSelVector()[i]; - auto name = ((lbug::common::string_t*)nameVector->getData())[pos].getAsString(); - auto age = ((int64_t*)ageVector->getData())[pos]; - if (name == "Adam") { EXPECT_EQ(age, 30); } - else if (name == "Karissa") { EXPECT_EQ(age, 40); } - else if (name == "Zhang") { EXPECT_EQ(age, 50); } - else if (name == "Noura") { EXPECT_EQ(age, 25); } - count++; - } - } - EXPECT_EQ(count, 4); -} - -TEST_F(IceDiskStorageTest, RelTableScanTest) { - auto catalog = Catalog::Get(*context); - auto transaction = Transaction::Get(*context); - auto relGroupEntry = - dynamic_cast(catalog->getTableCatalogEntry(transaction, "follows")); - - ASSERT_NE(relGroupEntry, nullptr); - - auto relTableID = relGroupEntry->getSingleRelEntryInfo().oid; - auto table = storageManager->getTable(relTableID); - auto relTable = dynamic_cast(table); - - ASSERT_NE(relTable, nullptr); - - auto nodeIDVector = std::make_unique(LogicalType::INTERNAL_ID(), database->getMemoryManager()); - nodeIDVector->state = std::make_shared(); - auto nbrIDVector = std::make_unique(LogicalType::INTERNAL_ID(), database->getMemoryManager()); - auto sinceVector = std::make_unique(LogicalType::INT32(), database->getMemoryManager()); - - std::vector outputVectors = {nbrIDVector.get(), sinceVector.get()}; - auto outState = std::make_shared(); - - auto memManager = database->getMemoryManager(); - IceDiskRelTableScanState scanState(*memManager, nodeIDVector.get(), outputVectors, outState); - - auto userTableEntry = catalog->getTableCatalogEntry(transaction, "user"); - ASSERT_NE(userTableEntry, nullptr); - auto userTableID = userTableEntry->getTableID(); - - // In this dataset, Adam is at row offset 1 in nodes_user.parquet. - nodeIDVector->state->getSelVectorUnsafe().setSelSize(1); - nodeIDVector->state->getSelVectorUnsafe().setToUnfiltered(); - - nodeID_t srcNode; - srcNode.offset = 1; - srcNode.tableID = userTableID; - ((lbug::common::nodeID_t*)nodeIDVector->getData())[0] = srcNode; - - auto sinceColumnID = relGroupEntry->getColumnID("since"); - scanState.setToTable(transaction, relTable, {NBR_ID_COLUMN_ID, sinceColumnID}); - relTable->initializeScanCoordination(transaction); - relTable->initScanState(transaction, scanState); - - int count = 0; - while (relTable->scanInternal(transaction, scanState)) { - auto selSize = outState->getSelVector().getSelSize(); - for (auto i = 0u; i < selSize; i++) { - auto pos = outState->getSelVector()[i]; - auto nbr = ((lbug::common::nodeID_t*)nbrIDVector->getData())[pos]; - // Adam follows Karissa (2) and Zhang (3) in nodes_user.parquet row order. - EXPECT_TRUE(nbr.offset == 2 || nbr.offset == 3); - count++; - } - } - EXPECT_EQ(count, 2); - - nodeID_t dstNode; - dstNode.offset = 2; - dstNode.tableID = userTableID; - ((lbug::common::nodeID_t*)nodeIDVector->getData())[0] = dstNode; - - scanState.setToTable(transaction, relTable, {NBR_ID_COLUMN_ID, sinceColumnID}, {}, - RelDataDirection::BWD); - relTable->initializeScanCoordination(transaction); - relTable->initScanState(transaction, scanState); - - count = 0; - while (relTable->scanInternal(transaction, scanState)) { - auto selSize = outState->getSelVector().getSelSize(); - for (auto i = 0u; i < selSize; i++) { - auto pos = outState->getSelVector()[i]; - auto nbr = ((lbug::common::nodeID_t*)nbrIDVector->getData())[pos]; - EXPECT_EQ(nbr.offset, 1); - count++; - } - } - EXPECT_EQ(count, 1); -} - -} // namespace testing -} // namespace lbug diff --git a/test/test_files/graph/parquet_rel_bwd.test b/test/test_files/graph/parquet_rel_bwd.test new file mode 100644 index 0000000000..2559403179 --- /dev/null +++ b/test/test_files/graph/parquet_rel_bwd.test @@ -0,0 +1,39 @@ +-DATASET GRAPH-STD parquet-rel-bwd-test + +-- + +-CASE ParquetRelBwdScan +-SKIP +-LOG FwdScanAll +-STATEMENT MATCH (u:user)-[:wrote]->(p:post) RETURN u.name, p.title ORDER BY u.name, p.title +---- 5 +Adam|p0 +Adam|p1 +Karissa|p1 +Karissa|p2 +Zhang|p3 + +-LOG BwdScanAll +-STATEMENT MATCH (p:post)<-[:wrote]-(u:user) RETURN u.name, p.title ORDER BY u.name, p.title +---- 5 +Adam|p0 +Adam|p1 +Karissa|p1 +Karissa|p2 +Zhang|p3 + +-LOG BwdScanBoundOnPost +-STATEMENT MATCH (p:post {title: 'p0'})<-[:wrote]-(u:user) RETURN u.name +---- 1 +Adam + +-LOG BwdScanBoundOnPostMultiple +-STATEMENT MATCH (p:post {title: 'p1'})<-[:wrote]-(u:user) RETURN u.name ORDER BY u.name +---- 2 +Adam +Karissa + +-LOG BwdScanBoundOnPostLast +-STATEMENT MATCH (p:post {title: 'p3'})<-[:wrote]-(u:user) RETURN u.name +---- 1 +Zhang diff --git a/test/test_helper/test_helper.cpp b/test/test_helper/test_helper.cpp index 6155b1faed..36b111d84c 100644 --- a/test/test_helper/test_helper.cpp +++ b/test/test_helper/test_helper.cpp @@ -45,6 +45,8 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn } std::string line; while (getline(file, line)) { + // replace single quote with double + std::replace(line.begin(), line.end(), '\'', '"'); // If this is a COPY statement, we need to append the LBUG_ROOT_DIRECTORY to the csv // file path. There maybe multiple csv files in the line, so we need to find all of them. std::vector csvFilePaths; @@ -86,16 +88,15 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn fullPath = normalizePathForCypher(std::move(fullPath)); line.replace(line.find(csvFilePath), csvFilePath.length(), fullPath); } - // Also handle storage = 'path' for parquet tables std::vector storagePaths; size_t storageIndex = 0; while (true) { - size_t start = line.find("storage = '", storageIndex); + size_t start = line.find("storage = \"", storageIndex); if (start == std::string::npos) { break; } - start += 11; // length of "storage = '" - size_t end = line.find("'", start); + start += 11; // length of "storage = "" + size_t end = line.find('"', start); if (end == std::string::npos) { break; } @@ -104,6 +105,10 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn storageIndex = end + 1; } for (auto& storagePath : storagePaths) { + if(storagePath.find("icebug-disk") != std::string::npos) { + continue; + } + auto fullPath = storagePath; if (std::filesystem::path(storagePath).is_relative()) { if (std::filesystem::path(storagePath).parent_path().empty()) { From 723ef937399056e05b92134bcd64f94367bc3749 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 7 May 2026 16:47:50 +0530 Subject: [PATCH 12/17] fix ice-disk node table scan initScanState --- .../storage/table/ice_disk_node_table.h | 19 +++++++++++++----- .../storage/table/ice_disk_rel_table.h | 6 +++--- src/storage/table/ice_disk_node_table.cpp | 19 +++++++++++++----- src/storage/table/ice_disk_rel_table.cpp | 20 ++++++++----------- 4 files changed, 39 insertions(+), 25 deletions(-) diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index c22285be5a..1c98ab9294 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -28,26 +28,35 @@ struct IceDiskNodeTableScanState : public TableScanState { : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { parquetScanState = std::make_unique(); } + + void setToTable(const transaction::Transaction* transaction, Table* table_, + std::vector columnIDs_, + std::vector columnPredicateSets_ = {}, + common::RelDataDirection direction = common::RelDataDirection::INVALID) override; }; // Shared state for morsel assignment across parallel scan threads struct IceDiskNodeTableScanSharedState { private: + std::mutex mtx; std::size_t numRowGroups = 0; - std::atomic currentRowGroupIdx{0}; + std::size_t currentRowGroupIdx = 0; public: void reset(std::size_t totalRowGroups) { numRowGroups = totalRowGroups; - currentRowGroupIdx.store(0, std::memory_order_relaxed); + currentRowGroupIdx = 0; } bool getNextMorsel(IceDiskNodeTableScanState* scanState) { - auto idx = currentRowGroupIdx.fetch_add(1, std::memory_order_relaxed); - if (idx < numRowGroups) { - scanState->currentRowGroupIdx = idx; + std::lock_guard lock(mtx); + + if (currentRowGroupIdx < numRowGroups) { + scanState->currentRowGroupIdx = currentRowGroupIdx; + currentRowGroupIdx++; return true; } + return false; } }; diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index a89539d535..73dcef4277 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -65,12 +65,12 @@ class IceDiskRelTable final : public RelTable { const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { return relGroupCatalogEntry; } private: - std::vector getIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) const; - std::vector readIndptrData(transaction::Transaction* transaction) const; + void loadIndicesRowGroupStartOffsets(const transaction::Transaction* transaction); + void loadIndptrData(transaction::Transaction* transaction); void copyCachedBoundNodeSelVector(RelTableScanState& relScanState) const; bool scanRowGroupForBoundNodes(transaction::Transaction* transaction, IceDiskRelTableScanState& iceDiskScanState, const std::vector& rowGroupsToProcess, - const std::unordered_set& boundNodeOffsets); + const std::unordered_set& boundNodeOffsets) const; std::size_t findSourceNodeForRow(std::size_t globalRowIdx) const; private: diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index 3be1b1f82e..10d15b65aa 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -22,6 +22,18 @@ using namespace lbug::transaction; namespace lbug { namespace storage { +void IceDiskNodeTableScanState::setToTable(const transaction::Transaction* /*transaction*/, Table* table_, + std::vector columnIDs_, + std::vector columnPredicateSets_, + common::RelDataDirection /*direction*/) { + // TableScanState::setToTable(transaction, table_, columnIDs_, std::move(columnPredicateSets_)); + table = table_; + columnIDs = std::move(columnIDs_); + columnPredicateSets = std::move(columnPredicateSets_); + // IceDisk node tables don't use NodeGroup infrastructure; skip the base class + // which would dereference the uninitialized nodeGroupScanState. +} + IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, const NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager) : NodeTable{storageManager, nodeTableEntry, memoryManager}, @@ -56,7 +68,7 @@ void IceDiskNodeTable::initScanState(Transaction* transaction, TableScanState& s bool /*resetCachedBoundNodeSelVec*/) const { auto& iceDiskNodeScanState = static_cast(scanState); - if(iceDiskNodeScanState.currentRowGroupIdx != static_cast(common::INVALID_NODE_GROUP_IDX)) { + if(iceDiskNodeScanState.currentRowGroupIdx == static_cast(common::INVALID_NODE_GROUP_IDX)) { iceDiskNodeScanState.scanCompleted = true; return; } @@ -116,9 +128,6 @@ void IceDiskNodeTable::initIceDiskScanForRowGroup(Transaction* transaction, } -// First run always fails due to iceDiskNodeScanState.scanCompleted == true as -// scanState.currentRowGroupIdx = INVALID_NODE_GROUP_IDX on the first -// run(look at initScanState function) tableScanSharedState.nextMorsel will drive the morsel assignment bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& scanState) { auto& iceDiskNodeScanState = static_cast(scanState); if (iceDiskNodeScanState.scanCompleted) { @@ -127,7 +136,7 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc scanState.resetOutVectors(); - // Read all data once into scan state + // Read data for the current row group if not yet done if (!iceDiskNodeScanState.dataReadCompleted) { readParquetData(transaction, scanState); } diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 6d6bf978d9..9e569f099c 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -56,8 +56,8 @@ IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::ta } void IceDiskRelTable::initializeScanCoordination(Transaction* transaction) { - indicesRowGroupStartOffsets = getIndicesRowGroupStartOffsets(transaction); - indptrData = readIndptrData(transaction); + loadIndicesRowGroupStartOffsets(transaction); + loadIndptrData(transaction); } void IceDiskRelTable::initScanState(Transaction* transaction, TableScanState& scanState, @@ -112,7 +112,7 @@ bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& sca bool IceDiskRelTable::scanRowGroupForBoundNodes(Transaction* transaction, IceDiskRelTableScanState& iceDiskScanState, const std::vector& rowGroupsToProcess, - const std::unordered_set& boundNodeOffsets) { + const std::unordered_set& boundNodeOffsets) const { if (!iceDiskScanState.indicesReader) { return false; } @@ -210,31 +210,29 @@ common::row_idx_t IceDiskRelTable::getNumTotalRows(const Transaction* transactio return reader.getMetadata()->num_rows; } -std::vector IceDiskRelTable::getIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) const { +void IceDiskRelTable::loadIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) { auto context = transaction->getClientContext(); auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); processor::ParquetReader reader(resolvedPath, std::vector(), context); auto metadata = reader.getMetadata(); - std::vector startOffsets; std::size_t currentOffset = 0; for (auto i = 0u; i < metadata->row_groups.size(); ++i) { - startOffsets.push_back(currentOffset); + indicesRowGroupStartOffsets.push_back(currentOffset); currentOffset += metadata->row_groups[i].num_rows; } - - return startOffsets; } -std::vector IceDiskRelTable::readIndptrData(Transaction* transaction) const { +void IceDiskRelTable::loadIndptrData(Transaction* transaction) { + indptrData.clear(); + auto context = transaction->getClientContext(); auto vfs = VirtualFileSystem::GetUnsafe(*context); auto resolvedPath = VirtualFileSystem::resolvePath(context, indptrFilePath); auto indptrReader = std::make_unique(resolvedPath, std::vector(), context); processor::ParquetReaderScanState scanState; std::vector groupsToRead; - std::vector indptrData; for (uint64_t i = 0; i < indptrReader->getMetadata()->row_groups.size(); ++i) { groupsToRead.push_back(i); @@ -271,8 +269,6 @@ std::vector IceDiskRelTable::readIndptrData(Transaction* transactio indptrData.push_back(value); } } - - return indptrData; } void IceDiskRelTable::copyCachedBoundNodeSelVector(RelTableScanState& relScanState) const { From 7ae4efc43fe4a38332430ca3729f4424de0ec7d2 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 7 May 2026 19:19:18 +0530 Subject: [PATCH 13/17] fix reset indicesRowGroupStartOffsets --- src/storage/table/ice_disk_rel_table.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 9e569f099c..bc60d39b65 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -211,6 +211,7 @@ common::row_idx_t IceDiskRelTable::getNumTotalRows(const Transaction* transactio } void IceDiskRelTable::loadIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) { + indicesRowGroupStartOffsets.clear(); auto context = transaction->getClientContext(); auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); processor::ParquetReader reader(resolvedPath, std::vector(), context); From 4e8f40752785a055042c59d196fd75324d61de55 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Fri, 8 May 2026 14:27:38 +0530 Subject: [PATCH 14/17] fix/optimize ice-disk rel table scan --- .../storage/table/ice_disk_node_table.h | 2 +- .../storage/table/ice_disk_rel_table.h | 67 +++- src/include/storage/table/table.h | 2 +- .../operator/scan/scan_node_table.cpp | 29 +- src/storage/table/ice_disk_rel_table.cpp | 375 +++++++++--------- test/test_files/graph/parquet_rel_bwd.test | 39 -- 6 files changed, 250 insertions(+), 264 deletions(-) delete mode 100644 test/test_files/graph/parquet_rel_bwd.test diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index 1c98ab9294..063f7d31a6 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -22,7 +22,7 @@ struct IceDiskNodeTableScanState : public TableScanState { std::size_t currentRowGroupBatchOffset = 0; // offset of current rowGroupBatch - IceDiskNodeTableScanState(common::ValueVector* nodeIDVector, + IceDiskNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector, std::vector outputVectors, std::shared_ptr outChunkState) : TableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index 73dcef4277..d31e532670 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -1,39 +1,49 @@ #pragma once #include +#include #include "catalog/catalog_entry/rel_group_catalog_entry.h" #include "common/exception/runtime.h" -#include "common/types/value/value.h" #include "processor/operator/persistent/reader/parquet/parquet_reader.h" #include "storage/table/rel_table.h" namespace lbug { +namespace common { +class VirtualFileSystem; +} // namespace common +namespace main { +class ClientContext; +} // namespace main + namespace storage { class IceDiskRelTable; +// The scan is reinitialized to the relevant row groups for each bound node. scanBatch is a reusable read buffer; it carries +// no positional state. High-degree nodes are handled by resuming across multiple calls. struct IceDiskRelTableScanState : public RelTableScanState { -private: - std::mutex mtx; -public: - std::unique_ptr indicesReader; - std::unique_ptr parquetScanState; - std::size_t currentRowGroupIdx = 0; + std::unique_ptr indicesReader; // null until first use + std::unique_ptr indicesScanState; + std::unique_ptr scanBatch; // reusable read buffer, lazily allocated + + // Resume state for the currently active bound node. + // activeEdgeEnd == 0 means no node is active (start fresh from the next bound node). + uint64_t activeEdgePos = 0; // global edge row to resume from + uint64_t activeEdgeEnd = 0; // exclusive end of the active node's edge range + common::sel_t activeSelPos = 0; // sel-vector position of the active bound node + common::offset_t activeNodeOffset = 0; // node offset of the active bound node (BWD filter) IceDiskRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector, std::vector outputVectors, std::shared_ptr outChunkState) - : RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} { - parquetScanState = std::make_unique(); - } + : RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)}, + indicesScanState{std::make_unique()} {} void setToTable(const transaction::Transaction* transaction, Table* table_, std::vector columnIDs_, std::vector columnPredicateSets_ = {}, common::RelDataDirection direction_ = common::RelDataDirection::FWD) override; - - void initializeIndicesReader(transaction::Transaction* transaction); }; class IceDiskRelTable final : public RelTable { @@ -41,7 +51,7 @@ class IceDiskRelTable final : public RelTable { IceDiskRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, common::table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager); - + void initializeScanCoordination(transaction::Transaction* transaction); void initScanState(transaction::Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec = true) const override; @@ -65,23 +75,38 @@ class IceDiskRelTable final : public RelTable { const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { return relGroupCatalogEntry; } private: - void loadIndicesRowGroupStartOffsets(const transaction::Transaction* transaction); + // Lazy-open the indices parquet reader and allocate the reusable scan batch. + void initIndicesReaderIfNeeded(IceDiskRelTableScanState& iceState, + main::ClientContext* context, common::VirtualFileSystem* vfs, + MemoryManager* memMgr) const; + + // Compute the CSR edge range for a node. Returns nullopt when the node has no edges. + struct EdgeRange { uint64_t start; uint64_t end; }; + std::optional getEdgeRange(common::offset_t nodeOffset, bool isFwd) const; + + // Find row groups covering [range.start, range.end), read up to DEFAULT_VECTOR_CAPACITY + // edges starting at range.start. Returns {count, nextEdgePos} where nextEdgePos == range.end + // means the node is fully scanned; otherwise resume from nextEdgePos next call. + struct EdgeScanProgress { + uint64_t count; // edges written to output vectors + uint64_t nextEdgePos; // global edge row to resume from next call + }; + EdgeScanProgress collectNodeEdges(RelTableScanState& state, IceDiskRelTableScanState& iceState, + EdgeRange range, common::offset_t nodeOffset, bool isFwd, + common::table_id_t nbrTableID, common::VirtualFileSystem* vfs) const; + void loadIndptrData(transaction::Transaction* transaction); + void loadIndicesMetadata(transaction::Transaction* transaction); void copyCachedBoundNodeSelVector(RelTableScanState& relScanState) const; - bool scanRowGroupForBoundNodes(transaction::Transaction* transaction, - IceDiskRelTableScanState& iceDiskScanState, const std::vector& rowGroupsToProcess, - const std::unordered_set& boundNodeOffsets) const; std::size_t findSourceNodeForRow(std::size_t globalRowIdx) const; private: std::string indicesFilePath; std::string indptrFilePath; const catalog::RelGroupCatalogEntry* relGroupCatalogEntry; - // Row group start offsets derived from Parquet metadata; stable for the lifetime of the table. - std::vector indicesRowGroupStartOffsets; - // Full CSR indptr array loaded once from disk; stable for the lifetime of the table. + // CSR indptr: element i = start of node i's edges. Size = numNodes + 1. std::vector indptrData; - constexpr static std::size_t scanRowGroupBatchSize = 2048; // Default batch size + std::vector indicesRGStarts; }; } // namespace storage diff --git a/src/include/storage/table/table.h b/src/include/storage/table/table.h index c0dd60615e..fc14653c61 100644 --- a/src/include/storage/table/table.h +++ b/src/include/storage/table/table.h @@ -60,7 +60,7 @@ struct LBUG_API TableScanState { virtual void setToTable(const transaction::Transaction* transaction, Table* table_, std::vector columnIDs_, - std::vector columnPredicateSets_ = {}, + std::vector columnPredicateSets_, common::RelDataDirection direction = common::RelDataDirection::INVALID); // Note that `resetCachedBoundNodeSelVec` is only applicable to RelTable for now. diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index fe70d6deef..e949f3b6a0 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -26,6 +26,12 @@ static std::unique_ptr createNodeTableScanState(NodeTable* table return std::make_unique(*memoryManager, nodeIDVector, outVectors, nodeIDVector->state); } + + if (dynamic_cast(table) != nullptr) { + return std::make_unique(*memoryManager, nodeIDVector, outVectors, + nodeIDVector->state); + } + return std::make_unique(nodeIDVector, outVectors, nodeIDVector->state); } @@ -154,28 +160,7 @@ void ScanNodeTableInfo::initScanState(TableScanState& scanState, void ScanNodeTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { ScanTable::initLocalStateInternal(resultSet, context); - auto nodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get(); - - // Check if the first table is a ParquetNodeTable or ArrowNodeTable and create appropriate scan - // state - auto* parquetTable = dynamic_cast(tableInfos[0].table); - auto* arrowTable = dynamic_cast(tableInfos[0].table); - auto* iceDiskTable = dynamic_cast(tableInfos[0].table); - if (parquetTable) { - scanState = std::make_unique( - *MemoryManager::Get(*context->clientContext), nodeIDVector, outVectors, - nodeIDVector->state); - } else if (iceDiskTable) { - scanState = std::make_unique( - nodeIDVector, outVectors, nodeIDVector->state); - } else if (arrowTable) { - scanState = - std::make_unique(*MemoryManager::Get(*context->clientContext), - nodeIDVector, outVectors, nodeIDVector->state); - } else { - scanState = - std::make_unique(nodeIDVector, outVectors, nodeIDVector->state); - } + nodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get(); currentTableIdx = 0; initCurrentTable(context); diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index bc60d39b65..cf4e56a6a3 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -1,14 +1,16 @@ #include "storage/table/ice_disk_rel_table.h" -#include -#include -#include +#include -#include "storage/storage_manager.h" -#include "storage/table/csr_node_group.h" -#include "transaction/transaction.h" #include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "common/assert.h" +#include "common/data_chunk/data_chunk.h" #include "common/exception/runtime.h" +#include "common/file_system/virtual_file_system.h" +#include "common/types/internal_id_util.h" +#include "processor/operator/persistent/reader/parquet/parquet_reader.h" +#include "storage/storage_manager.h" +#include "transaction/transaction.h" using namespace lbug::common; using namespace lbug::transaction; @@ -21,23 +23,11 @@ void IceDiskRelTableScanState::setToTable(const Transaction* transaction, Table* std::vector columnIDs_, std::vector columnPredicateSets_, common::RelDataDirection direction_) { - // Call base class implementation but skip local table setup TableScanState::setToTable(transaction, table_, std::move(columnIDs_), std::move(columnPredicateSets_)); direction = direction_; } -void IceDiskRelTableScanState::initializeIndicesReader(Transaction* transaction) { - if (!indicesReader) { - std::lock_guard lock(mtx); - - if(!indicesReader) { // Double-checked locking to avoid redundant initialization - auto* iceDiskRelTable = static_cast(table); - indicesReader = std::make_unique(iceDiskRelTable->getIndicesFilePath(), std::vector(), transaction->getClientContext()); - } - } -} - IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, common::table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager) @@ -46,232 +36,266 @@ IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::ta if (relGroupEntry->getIndicesPath().empty()) { throw RuntimeException("Indices file path is empty for icebug-disk-backed rel table"); } - if (relGroupEntry->getIndptrPath().empty()) { throw RuntimeException("Indptr file path is empty for icebug-disk-backed rel table"); } - indicesFilePath = relGroupEntry->getIndicesPath(); indptrFilePath = relGroupEntry->getIndptrPath(); } void IceDiskRelTable::initializeScanCoordination(Transaction* transaction) { - loadIndicesRowGroupStartOffsets(transaction); loadIndptrData(transaction); + loadIndicesMetadata(transaction); } -void IceDiskRelTable::initScanState(Transaction* transaction, TableScanState& scanState, +void IceDiskRelTable::initScanState(Transaction* /*transaction*/, TableScanState& scanState, bool resetCachedBoundNodeSelVec) const { auto& relScanState = scanState.cast(); - - // For morsel-driven parallelism, each scan state maintains its own bound node processing state - // No shared state needed between threads if (resetCachedBoundNodeSelVec) { - // Copy the cached bound node selection vector from the scan state copyCachedBoundNodeSelVector(relScanState); } + relScanState.currBoundNodeIdx = 0; - auto& iceDiskScanState = static_cast(relScanState); - iceDiskScanState.initializeIndicesReader(transaction); - iceDiskScanState.currentRowGroupIdx = 0; + auto& iceState = dynamic_cast(scanState); + iceState.activeEdgePos = 0; + iceState.activeEdgeEnd = 0; } bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& scanState) { - auto& iceDiskScanState = static_cast(scanState); - + auto& state = scanState.cast(); + auto& iceState = dynamic_cast(scanState); scanState.resetOutVectors(); - // Check if we have any row groups left to process - if (iceDiskScanState.currentRowGroupIdx >= indicesRowGroupStartOffsets.size()) { - // No more row groups to process - auto newSelVector = std::make_shared(0); - iceDiskScanState.outState->setSelVector(newSelVector); - return false; - } + auto* context = transaction->getClientContext(); + auto* vfs = VirtualFileSystem::GetUnsafe(*context); + auto* memMgr = MemoryManager::Get(*context); - // Process the current row group - std::vector rowGroupsToProcess = {iceDiskScanState.currentRowGroupIdx}; + initIndicesReaderIfNeeded(iceState, context, vfs, memMgr); - // Create a set of bound node IDs for fast lookup - std::unordered_set boundNodeOffsets; - for (size_t i = 0; i < iceDiskScanState.cachedBoundNodeSelVector.getSelSize(); ++i) { - common::sel_t boundNodeIdx = iceDiskScanState.cachedBoundNodeSelVector[i]; - const auto boundNodeID = iceDiskScanState.nodeIDVector->getValue(boundNodeIdx); - boundNodeOffsets.insert(boundNodeID.offset); - } + const bool isFwd = state.direction != RelDataDirection::BWD; + const auto nbrTableID = isFwd ? getToNodeTableID() : getFromNodeTableID(); + const auto numBoundNodes = state.cachedBoundNodeSelVector.getSelSize(); - // Scan the current row group and collect relationships for bound nodes - bool hasData = scanRowGroupForBoundNodes(transaction, iceDiskScanState, rowGroupsToProcess, - boundNodeOffsets); + while (true) { + // If the active node still has edges to emit, resume from where we left off. + // Otherwise advance to the next bound node. + if (iceState.activeEdgePos >= iceState.activeEdgeEnd) { + if (state.currBoundNodeIdx >= numBoundNodes) { + break; + } + const auto selPos = state.cachedBoundNodeSelVector[state.currBoundNodeIdx]; + const auto nodeOffset = state.nodeIDVector->getValue(selPos).offset; + state.currBoundNodeIdx++; - // Move to next row group for next call - iceDiskScanState.currentRowGroupIdx++; + const auto range = getEdgeRange(nodeOffset, isFwd); + if (!range) { + iceState.activeEdgeEnd = 0; + continue; + } + iceState.activeEdgePos = range->start; + iceState.activeEdgeEnd = range->end; + iceState.activeSelPos = selPos; + iceState.activeNodeOffset = nodeOffset; + } + + const auto [count, nextEdgePos] = collectNodeEdges(state, iceState, + {iceState.activeEdgePos, iceState.activeEdgeEnd}, + iceState.activeNodeOffset, isFwd, nbrTableID, vfs); + iceState.activeEdgePos = nextEdgePos; + + if (count == 0) { + continue; + } + + auto selVec = std::make_shared(static_cast(count)); + selVec->setToUnfiltered(static_cast(count)); + state.outState->setSelVector(selVec); + state.setNodeIDVectorToFlat(iceState.activeSelPos); + return true; + } - return hasData; + state.outState->setSelVector(std::make_shared(0)); + return false; } -bool IceDiskRelTable::scanRowGroupForBoundNodes(Transaction* transaction, - IceDiskRelTableScanState& iceDiskScanState, const std::vector& rowGroupsToProcess, - const std::unordered_set& boundNodeOffsets) const { - if (!iceDiskScanState.indicesReader) { - return false; +void IceDiskRelTable::initIndicesReaderIfNeeded(IceDiskRelTableScanState& iceState, + main::ClientContext* context, VirtualFileSystem* vfs, MemoryManager* memMgr) const { + if (iceState.indicesReader) { + return; } + auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); + iceState.indicesReader = + std::make_unique(resolvedPath, std::vector(), context); + // initializeScan triggers createReader() which populates column metadata. + // Use an empty group list to get the schema only, without reading any data. + iceState.indicesReader->initializeScan(*iceState.indicesScanState, {}, vfs); + const uint32_t numCols = iceState.indicesReader->getNumColumns(); + iceState.scanBatch = std::make_unique(numCols); + for (uint32_t col = 0; col < numCols; ++col) { + iceState.scanBatch->insert(col, + std::make_shared(iceState.indicesReader->getColumnType(col).copy(), memMgr)); + } +} - // Initialize scan state for the assigned row groups - auto context = transaction->getClientContext(); - auto vfs = VirtualFileSystem::GetUnsafe(*context); - iceDiskScanState.indicesReader->initializeScan(*iceDiskScanState.parquetScanState, - rowGroupsToProcess, vfs); - - // Create DataChunk matching the indices parquet file schema - auto numIndicesColumns = iceDiskScanState.indicesReader->getNumColumns(); - DataChunk indicesChunk(numIndicesColumns); - - // Insert value vectors for all columns in the parquet file - auto memoryManager = MemoryManager::Get(*context); - for (uint32_t colIdx = 0; colIdx < numIndicesColumns; ++colIdx) { - const auto& columnTypeRef = iceDiskScanState.indicesReader->getColumnType(colIdx); - auto columnType = columnTypeRef.copy(); - auto vector = std::make_shared(std::move(columnType), memoryManager); - indicesChunk.insert(colIdx, vector); +std::optional IceDiskRelTable::getEdgeRange( + offset_t nodeOffset, bool isFwd) const { + uint64_t start, end; + if (isFwd) { + if (nodeOffset + 1 >= indptrData.size()) { + return std::nullopt; + } + start = indptrData[nodeOffset]; + end = indptrData[nodeOffset + 1]; + } else { + start = 0; + end = indicesRGStarts.empty() ? 0 : indicesRGStarts.back(); + } + if (start >= end) { + return std::nullopt; } + return EdgeRange{start, end}; +} - // Scan the row groups and collect relationships for bound nodes. - const auto isFwd = iceDiskScanState.direction != RelDataDirection::BWD; - uint64_t totalRowsCollected = 0; - uint64_t currentGlobalRowIdx = 0; +IceDiskRelTable::EdgeScanProgress IceDiskRelTable::collectNodeEdges(RelTableScanState& state, + IceDiskRelTableScanState& iceState, EdgeRange range, offset_t nodeOffset, bool isFwd, + table_id_t nbrTableID, VirtualFileSystem* vfs) const { + // Reset selSize so the parquet reader's "setup" return (true, no data read) is not + // mistaken for a batch of stale data left over from the previous node's scan. + iceState.scanBatch->state->getSelVectorUnsafe().setSelSize(0); - // Calculate the starting global row index for the first row group - if (!rowGroupsToProcess.empty()) { - currentGlobalRowIdx = indicesRowGroupStartOffsets[rowGroupsToProcess[0]]; + // Locate the first row group containing range.start. + auto it = std::upper_bound(indicesRGStarts.begin(), indicesRGStarts.end(), range.start); + DASSERT(it != indicesRGStarts.begin()); + --it; + const uint64_t startRG = static_cast(std::distance(indicesRGStarts.begin(), it)); + + // Collect all row groups covering [range.start, range.end). + std::vector rowGroups; + for (uint64_t rg = startRG; rg + 1 < indicesRGStarts.size(); ++rg) { + rowGroups.push_back(rg); + if (indicesRGStarts[rg + 1] >= range.end) { + break; + } } + iceState.indicesReader->initializeScan(*iceState.indicesScanState, rowGroups, vfs); - while (totalRowsCollected < IceDiskRelTable::scanRowGroupBatchSize && - iceDiskScanState.indicesReader->scanInternal(*iceDiskScanState.parquetScanState, - indicesChunk)) { + uint64_t batchStart = indicesRGStarts[startRG]; + uint64_t count = 0; + uint64_t nextEdgePos = range.end; // default: node fully scanned + bool done = false; - auto selSize = indicesChunk.state->getSelVector().getSelSize(); - - for (size_t i = 0; i < selSize && totalRowsCollected < IceDiskRelTable::scanRowGroupBatchSize; - ++i, ++currentGlobalRowIdx) { - // Find which source node this row belongs to. - const auto sourceNodeOffset = findSourceNodeForRow(currentGlobalRowIdx); + while (!done) { + if (!iceState.indicesReader->scanInternal(*iceState.indicesScanState, *iceState.scanBatch)) { + break; + } + const auto& batchSel = iceState.scanBatch->state->getSelVector(); + const auto batchSize = batchSel.getSelSize(); + const auto& batch = *iceState.scanBatch; - // Column 0 in indices file is the destination node offset. - const auto dstOffset = indicesChunk.getValueVector(0).getValue(i); - const auto boundOffset = isFwd ? sourceNodeOffset : dstOffset; - - // not a bound node, skip - if (boundNodeOffsets.find(boundOffset) == boundNodeOffsets.end()) { + for (uint64_t i = 0; i < batchSize; ++i) { + const uint64_t globalRow = batchStart + i; + if (globalRow < range.start) { continue; } + if (globalRow >= range.end) { + done = true; + break; + } - const auto nbrOffset = isFwd ? dstOffset : sourceNodeOffset; - const auto nbrTableID = isFwd ? getToNodeTableID() : getFromNodeTableID(); - auto nbrNodeID = internalID_t(nbrOffset, nbrTableID); - - // outputVectors[0] is the neighbor node ID, if requested. - if (!iceDiskScanState.outputVectors.empty()) { - iceDiskScanState.outputVectors[0]->setValue(totalRowsCollected, nbrNodeID); + const auto physIdx = batchSel[static_cast(i)]; + const auto destOffset = batch.getValueVector(0).getValue(physIdx); + + if (isFwd) { + if (!state.outputVectors.empty()) { + state.outputVectors[0]->setValue(count, internalID_t{destOffset, nbrTableID}); + } + } else { + if (destOffset != nodeOffset) { + continue; + } + if (!state.outputVectors.empty()) { + state.outputVectors[0]->setValue(count, + internalID_t{findSourceNodeForRow(globalRow), nbrTableID}); + } } - // If there are additional columns (e.g., weight), copy them to subsequent output - // vectors These are property columns and should have matching types - for (uint32_t colIdx = 1; - colIdx < numIndicesColumns && colIdx < iceDiskScanState.outputVectors.size(); - ++colIdx) { - iceDiskScanState.outputVectors[colIdx]->copyFromVectorData(totalRowsCollected, - &indicesChunk.getValueVector(colIdx), i); + for (uint32_t col = 1; + col < batch.getNumValueVectors() && col < state.outputVectors.size(); ++col) { + const auto& vec = batch.getValueVector(col); + if (vec.isNull(physIdx)) { + state.outputVectors[col]->setNull(count, true); + } else { + state.outputVectors[col]->copyFromValue(count, *vec.getAsValue(physIdx)); + } } - totalRowsCollected++; + if (++count >= DEFAULT_VECTOR_CAPACITY) { + // Node has more edges; resume from the next global row on the next call. + nextEdgePos = globalRow + 1; + done = true; + break; + } } + batchStart += batchSize; } - // No data found - if (totalRowsCollected <= 0) { - auto selVector = std::make_shared(0); - iceDiskScanState.outState->setSelVector(selVector); - return false; - } - - auto selVector = std::make_shared(totalRowsCollected); - selVector->setToUnfiltered(totalRowsCollected); - iceDiskScanState.outState->setSelVector(selVector); - - return true; + return {count, nextEdgePos}; } common::row_idx_t IceDiskRelTable::getNumTotalRows(const Transaction* transaction) { - auto context = transaction->getClientContext(); - auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); - std::vector dummySkips; - processor::ParquetReader reader(resolvedPath, dummySkips, context); - return reader.getMetadata()->num_rows; -} - -void IceDiskRelTable::loadIndicesRowGroupStartOffsets(const transaction::Transaction* transaction) { - indicesRowGroupStartOffsets.clear(); auto context = transaction->getClientContext(); auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); processor::ParquetReader reader(resolvedPath, std::vector(), context); - - auto metadata = reader.getMetadata(); - std::size_t currentOffset = 0; - - for (auto i = 0u; i < metadata->row_groups.size(); ++i) { - indicesRowGroupStartOffsets.push_back(currentOffset); - currentOffset += metadata->row_groups[i].num_rows; - } + return reader.getMetadata()->num_rows; } void IceDiskRelTable::loadIndptrData(Transaction* transaction) { indptrData.clear(); auto context = transaction->getClientContext(); - auto vfs = VirtualFileSystem::GetUnsafe(*context); + auto* vfs = VirtualFileSystem::GetUnsafe(*context); auto resolvedPath = VirtualFileSystem::resolvePath(context, indptrFilePath); - auto indptrReader = std::make_unique(resolvedPath, std::vector(), context); + auto reader = + std::make_unique(resolvedPath, std::vector(), context); processor::ParquetReaderScanState scanState; - std::vector groupsToRead; - for (uint64_t i = 0; i < indptrReader->getMetadata()->row_groups.size(); ++i) { + std::vector groupsToRead; + for (uint64_t i = 0; i < reader->getMetadata()->row_groups.size(); ++i) { groupsToRead.push_back(i); } + reader->initializeScan(scanState, groupsToRead, vfs); - indptrReader->initializeScan(scanState, groupsToRead, vfs); - - // Check if the indptr file has any columns after scan initialization - auto numColumns = indptrReader->getNumColumns(); - if (numColumns == 0) { + if (reader->getNumColumns() == 0) { throw RuntimeException("Indptr parquet file has no columns"); } - - // Validate column type for indptr - const auto& indptrType = indptrReader->getColumnType(0); - if (!LogicalTypeUtils::isIntegral(indptrType.getLogicalTypeID())) { - throw RuntimeException( - "Indptr parquet file column must be integer type (column 0)"); + if (!LogicalTypeUtils::isIntegral(reader->getColumnType(0).getLogicalTypeID())) { + throw RuntimeException("Indptr parquet file column must be integer type"); } - DataChunk dataChunk(1); - const auto& columnTypeRef = indptrReader->getColumnType(0); - auto columnType = columnTypeRef.copy(); - auto vector = std::make_shared(std::move(columnType), MemoryManager::Get(*context)); - dataChunk.insert(0, vector); - - while (indptrReader->scanInternal(scanState, dataChunk)) { - auto selVector = dataChunk.state->getSelVectorShared(); - auto selSize = selVector->getSelSize(); - auto& valVector = dataChunk.getValueVector(0); - - for (std::size_t i = 0; i < selSize; ++i) { - auto value = valVector.getValue((*selVector)[i]); - indptrData.push_back(value); + DataChunk chunk(1); + chunk.insert(0, std::make_shared(reader->getColumnType(0).copy())); + + while (reader->scanInternal(scanState, chunk)) { + auto& sel = chunk.state->getSelVector(); + for (size_t i = 0; i < sel.getSelSize(); ++i) { + indptrData.push_back(chunk.getValueVector(0).getValue(sel[i])); } } } +void IceDiskRelTable::loadIndicesMetadata(Transaction* transaction) { + indicesRGStarts.clear(); + auto context = transaction->getClientContext(); + auto resolvedPath = VirtualFileSystem::resolvePath(context, indicesFilePath); + processor::ParquetReader reader(resolvedPath, std::vector(), context); + uint64_t cumulative = 0; + for (const auto& rg : reader.getMetadata()->row_groups) { + indicesRGStarts.push_back(cumulative); + cumulative += static_cast(rg.num_rows); + } + indicesRGStarts.push_back(cumulative); // sentinel = total edge count +} + void IceDiskRelTable::copyCachedBoundNodeSelVector(RelTableScanState& relScanState) const { if (relScanState.nodeIDVector->state->getSelVector().isUnfiltered()) { relScanState.cachedBoundNodeSelVector.setToUnfiltered(); @@ -286,17 +310,8 @@ void IceDiskRelTable::copyCachedBoundNodeSelVector(RelTableScanState& relScanSta } std::size_t IceDiskRelTable::findSourceNodeForRow(std::size_t globalRowIdx) const { - if (indptrData.empty()) { - throw RuntimeException("Indptr data not loaded for CSR format"); - } - - // Binary search to find the source node - // indptrData[i] contains the start row index for source node i - // Find the largest i where indptrData[i] <= globalRowIdx auto it = std::upper_bound(indptrData.begin(), indptrData.end(), globalRowIdx); - if (it == indptrData.begin()) { - throw RuntimeException("Invalid global row index: " + std::to_string(globalRowIdx)); - } + DASSERT(it != indptrData.begin()); --it; return static_cast(std::distance(indptrData.begin(), it)); } diff --git a/test/test_files/graph/parquet_rel_bwd.test b/test/test_files/graph/parquet_rel_bwd.test deleted file mode 100644 index 2559403179..0000000000 --- a/test/test_files/graph/parquet_rel_bwd.test +++ /dev/null @@ -1,39 +0,0 @@ --DATASET GRAPH-STD parquet-rel-bwd-test - --- - --CASE ParquetRelBwdScan --SKIP --LOG FwdScanAll --STATEMENT MATCH (u:user)-[:wrote]->(p:post) RETURN u.name, p.title ORDER BY u.name, p.title ----- 5 -Adam|p0 -Adam|p1 -Karissa|p1 -Karissa|p2 -Zhang|p3 - --LOG BwdScanAll --STATEMENT MATCH (p:post)<-[:wrote]-(u:user) RETURN u.name, p.title ORDER BY u.name, p.title ----- 5 -Adam|p0 -Adam|p1 -Karissa|p1 -Karissa|p2 -Zhang|p3 - --LOG BwdScanBoundOnPost --STATEMENT MATCH (p:post {title: 'p0'})<-[:wrote]-(u:user) RETURN u.name ----- 1 -Adam - --LOG BwdScanBoundOnPostMultiple --STATEMENT MATCH (p:post {title: 'p1'})<-[:wrote]-(u:user) RETURN u.name ORDER BY u.name ----- 2 -Adam -Karissa - --LOG BwdScanBoundOnPostLast --STATEMENT MATCH (p:post {title: 'p3'})<-[:wrote]-(u:user) RETURN u.name ----- 1 -Zhang From 39635d64013ff51a8900b6936ed637f407270bfd Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Fri, 8 May 2026 20:04:19 +0530 Subject: [PATCH 15/17] revert table_path, indptr, indices options --- src/binder/bind/bind_ddl.cpp | 43 +++---------- src/catalog/catalog.cpp | 4 +- .../node_table_catalog_entry.cpp | 9 +-- .../catalog_entry/rel_group_catalog_entry.cpp | 16 +---- .../binder/ddl/bound_create_table_info.h | 21 +++---- .../catalog_entry/node_table_catalog_entry.h | 6 +- .../catalog_entry/rel_group_catalog_entry.h | 6 -- src/include/common/constants.h | 8 ++- .../storage/table/ice_disk_node_table.h | 12 ++-- .../storage/table/ice_disk_rel_table.h | 35 ++++++----- src/include/storage/table/ice_disk_utils.h | 60 +++++++++++++++++++ .../operator/scan/scan_multi_rel_tables.cpp | 8 ++- .../operator/scan/scan_node_table.cpp | 1 - .../operator/scan/scan_rel_table.cpp | 8 ++- src/storage/storage_manager.cpp | 18 +++--- src/storage/table/ice_disk_node_table.cpp | 52 +++++++++------- src/storage/table/ice_disk_rel_table.cpp | 57 +++++++++--------- test/test_helper/test_helper.cpp | 27 ++++++++- 18 files changed, 220 insertions(+), 171 deletions(-) create mode 100644 src/include/storage/table/ice_disk_utils.h diff --git a/src/binder/bind/bind_ddl.cpp b/src/binder/bind/bind_ddl.cpp index d69f7519d5..c01b6f2e9c 100644 --- a/src/binder/bind/bind_ddl.cpp +++ b/src/binder/bind/bind_ddl.cpp @@ -10,6 +10,7 @@ #include "catalog/catalog.h" #include "catalog/catalog_entry/node_table_catalog_entry.h" #include "catalog/catalog_entry/sequence_catalog_entry.h" +#include "common/constants.h" #include "common/enums/extend_direction_util.h" #include "common/exception/binder.h" #include "common/exception/message.h" @@ -174,30 +175,6 @@ static std::string getStorage(const case_insensitive_map_t& options) { return ""; } -static std::string getTablePath(const case_insensitive_map_t& options) { - if (options.contains(TableOptionConstants::TABLE_PATH)) { - return options.at(TableOptionConstants::TABLE_PATH).toString(); - } - - return ""; -} - -static std::string getIndicesPath(const case_insensitive_map_t& options) { - if (options.contains(TableOptionConstants::INDICES_OPTION)) { - return options.at(TableOptionConstants::INDICES_OPTION).toString(); - } - - return ""; -} - -static std::string getIndptrPath(const case_insensitive_map_t& options) { - if (options.contains(TableOptionConstants::INDPTR_OPTION)) { - return options.at(TableOptionConstants::INDPTR_OPTION).toString(); - } - - return ""; -} - static ExtendDirection getStorageDirection(const case_insensitive_map_t& options) { if (options.contains(TableOptionConstants::REL_STORAGE_DIRECTION_OPTION)) { return ExtendDirectionUtil::fromString( @@ -212,9 +189,8 @@ BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info validatePrimaryKey(extraInfo.pKName, propertyDefinitions); auto boundOptions = bindParsingOptions(extraInfo.options); auto storage = getStorage(boundOptions); - auto tablePath = getTablePath(boundOptions); auto boundExtraInfo = std::make_unique(extraInfo.pKName, - std::move(propertyDefinitions), std::move(storage), std::move(tablePath)); + std::move(propertyDefinitions), std::move(storage)); return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName, info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry()); } @@ -237,8 +213,6 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* auto boundOptions = bindParsingOptions(extraInfo.options); auto storageDirection = getStorageDirection(boundOptions); auto storage = getStorage(boundOptions); - auto indicesPath = getIndicesPath(boundOptions); - auto indptrPath = getIndptrPath(boundOptions); std::optional scanFunction = std::nullopt; std::optional> scanBindData = std::nullopt; std::string foreignDatabaseName; @@ -338,13 +312,14 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* } } - // For icebug-disk rel tables, validate that FROM and TO are icebug-disk tables - if (storage.find("icebug-disk") != std::string::npos) { + // For icebug-disk rel tables, validate that FROM and TO are icebug-disk node tables + if (TableOptionConstants::isIceBugDiskStorage(storage)) { auto srcNodeEntry = srcEntry->ptrCast(); auto dstNodeEntry = dstEntry->ptrCast(); - if (srcNodeEntry->getStorage().find("icebug-disk") == std::string::npos || - dstNodeEntry->getStorage().find("icebug-disk") == std::string::npos) { - throw BinderException("icebug-disk rel tables require both FROM and TO tables to be icebug-disk node tables."); + if (!TableOptionConstants::isIceBugDiskStorage(srcNodeEntry->getStorage()) || + !TableOptionConstants::isIceBugDiskStorage(dstNodeEntry->getStorage())) { + throw BinderException("icebug-disk rel tables require both FROM and TO tables to " + "be icebug-disk node tables."); } } @@ -362,7 +337,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* } auto boundExtraInfo = std::make_unique( std::move(propertyDefinitions), srcMultiplicity, dstMultiplicity, storageDirection, - std::move(nodePairs), std::move(storage), std::move(indicesPath), std::move(indptrPath), std::move(scanFunction), std::move(scanBindData), + std::move(nodePairs), std::move(storage), std::move(scanFunction), std::move(scanBindData), std::move(foreignDatabaseName)); return BoundCreateTableInfo(CatalogEntryType::REL_GROUP_ENTRY, info->tableName, info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry()); diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 29e89205d5..930dbc2e68 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -205,7 +205,7 @@ CatalogEntry* Catalog::createRelGroupEntry(Transaction* transaction, } auto relGroupEntry = std::make_unique(info.tableName, extraInfo->srcMultiplicity, extraInfo->dstMultiplicity, extraInfo->storageDirection, - std::move(relTableInfos), extraInfo->storage, extraInfo->indicesPath, extraInfo->indptrPath, extraInfo->scanFunction, + std::move(relTableInfos), extraInfo->storage, extraInfo->scanFunction, std::move(extraInfo->scanBindData), extraInfo->foreignDatabaseName); for (auto& definition : extraInfo->propertyDefinitions) { relGroupEntry->addProperty(definition); @@ -561,7 +561,7 @@ CatalogEntry* Catalog::createNodeTableEntry(Transaction* transaction, const BoundCreateTableInfo& info) { const auto extraInfo = info.extraInfo->constPtrCast(); auto entry = std::make_unique(info.tableName, extraInfo->primaryKeyName, - extraInfo->storage, extraInfo->tablePath); + extraInfo->storage); for (auto& definition : extraInfo->propertyDefinitions) { entry->addProperty(definition); } diff --git a/src/catalog/catalog_entry/node_table_catalog_entry.cpp b/src/catalog/catalog_entry/node_table_catalog_entry.cpp index 0b0822c57c..ee7170fc13 100644 --- a/src/catalog/catalog_entry/node_table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/node_table_catalog_entry.cpp @@ -24,8 +24,6 @@ void NodeTableCatalogEntry::serialize(common::Serializer& serializer) const { serializer.write(primaryKeyName); serializer.writeDebuggingInfo("storage"); serializer.write(storage); - serializer.writeDebuggingInfo("tablePath"); - serializer.write(tablePath); } std::unique_ptr NodeTableCatalogEntry::deserialize( @@ -37,13 +35,9 @@ std::unique_ptr NodeTableCatalogEntry::deserialize( deserializer.deserializeValue(primaryKeyName); deserializer.validateDebuggingInfo(debuggingInfo, "storage"); deserializer.deserializeValue(storage); - std::string tablePath; - deserializer.validateDebuggingInfo(debuggingInfo, "tablePath"); - deserializer.deserializeValue(tablePath); auto nodeTableEntry = std::make_unique(); nodeTableEntry->primaryKeyName = primaryKeyName; nodeTableEntry->storage = storage; - nodeTableEntry->tablePath = tablePath; return nodeTableEntry; } @@ -72,7 +66,6 @@ std::unique_ptr NodeTableCatalogEntry::copy() const { auto other = std::make_unique(); other->primaryKeyName = primaryKeyName; other->storage = storage; - other->tablePath = tablePath; other->scanFunction = scanFunction; other->createBindDataFunc = createBindDataFunc; other->foreignDatabaseName = foreignDatabaseName; @@ -83,7 +76,7 @@ std::unique_ptr NodeTableCatalogEntry::copy() const { std::unique_ptr NodeTableCatalogEntry::getBoundExtraCreateInfo( transaction::Transaction*) const { return std::make_unique(primaryKeyName, - copyVector(getProperties()), storage, tablePath); + copyVector(getProperties()), storage); } } // namespace catalog diff --git a/src/catalog/catalog_entry/rel_group_catalog_entry.cpp b/src/catalog/catalog_entry/rel_group_catalog_entry.cpp index c172b8e7ad..4b9f145bf2 100644 --- a/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +++ b/src/catalog/catalog_entry/rel_group_catalog_entry.cpp @@ -98,10 +98,6 @@ void RelGroupCatalogEntry::serialize(Serializer& serializer) const { serializer.serializeValue(storageDirection); serializer.writeDebuggingInfo("storage"); serializer.serializeValue(storage); - serializer.writeDebuggingInfo("indicesPath"); - serializer.serializeValue(indicesPath); - serializer.writeDebuggingInfo("indptrPath"); - serializer.serializeValue(indptrPath); serializer.writeDebuggingInfo("scanFunction"); serializer.serializeValue(scanFunction.has_value()); if (scanFunction.has_value()) { @@ -127,12 +123,6 @@ std::unique_ptr RelGroupCatalogEntry::deserialize( deserializer.deserializeValue(storageDirection); deserializer.validateDebuggingInfo(debuggingInfo, "storage"); deserializer.deserializeValue(storage); - std::string indicesPath; - deserializer.validateDebuggingInfo(debuggingInfo, "indicesPath"); - deserializer.deserializeValue(indicesPath); - std::string indptrPath; - deserializer.validateDebuggingInfo(debuggingInfo, "indptrPath"); - deserializer.deserializeValue(indptrPath); deserializer.validateDebuggingInfo(debuggingInfo, "scanFunction"); bool hasScanFunction; deserializer.deserializeValue(hasScanFunction); @@ -147,8 +137,6 @@ std::unique_ptr RelGroupCatalogEntry::deserialize( relGroupEntry->dstMultiplicity = dstMultiplicity; relGroupEntry->storageDirection = storageDirection; relGroupEntry->storage = storage; - relGroupEntry->indicesPath = indicesPath; - relGroupEntry->indptrPath = indptrPath; relGroupEntry->scanFunction = scanFunction; relGroupEntry->relTableInfos = relTableInfos; return relGroupEntry; @@ -210,8 +198,6 @@ std::unique_ptr RelGroupCatalogEntry::copy() const { other->dstMultiplicity = dstMultiplicity; other->storageDirection = storageDirection; other->storage = storage; - other->indicesPath = indicesPath; - other->indptrPath = indptrPath; other->scanFunction = scanFunction; other->scanBindData = std::nullopt; // TODO: implement copy for bindData if needed other->foreignDatabaseName = foreignDatabaseName; @@ -228,7 +214,7 @@ RelGroupCatalogEntry::getBoundExtraCreateInfo(transaction::Transaction*) const { } return std::make_unique( copyVector(propertyCollection.getDefinitions()), srcMultiplicity, dstMultiplicity, - storageDirection, std::move(nodePairs), storage, indicesPath, indptrPath); + storageDirection, std::move(nodePairs), storage); } } // namespace catalog diff --git a/src/include/binder/ddl/bound_create_table_info.h b/src/include/binder/ddl/bound_create_table_info.h index 0a88abe66d..2db5ac5d39 100644 --- a/src/include/binder/ddl/bound_create_table_info.h +++ b/src/include/binder/ddl/bound_create_table_info.h @@ -76,15 +76,14 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo { struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo { std::string primaryKeyName; std::string storage; - std::string tablePath; BoundExtraCreateNodeTableInfo(std::string primaryKeyName, - std::vector definitions, std::string storage = "", std::string tablePath = "") + std::vector definitions, std::string storage = "") : BoundExtraCreateTableInfo{std::move(definitions)}, - primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)}, tablePath{std::move(tablePath)} {} + primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {} BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other) : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)}, - primaryKeyName{other.primaryKeyName}, storage{other.storage}, tablePath{other.tablePath} {} + primaryKeyName{other.primaryKeyName}, storage{other.storage} {} std::unique_ptr copy() const override { return std::make_unique(*this); @@ -97,8 +96,6 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo { common::ExtendDirection storageDirection; std::vector nodePairs; std::string storage; - std::string indicesPath; - std::string indptrPath; std::optional scanFunction; std::optional> scanBindData; std::string foreignDatabaseName; @@ -106,22 +103,22 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo { explicit BoundExtraCreateRelTableGroupInfo(std::vector definitions, common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection, std::vector nodePairs, - std::string storage = "", std::string indicesPath = "", std::string indptrPath = "", + std::string storage = "", std::optional scanFunction = std::nullopt, std::optional> scanBindData = std::nullopt, std::string foreignDatabaseName = "") : BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity}, dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection}, - nodePairs{std::move(nodePairs)}, storage{std::move(storage)}, indicesPath{std::move(indicesPath)}, - indptrPath{std::move(indptrPath)}, scanFunction{std::move(scanFunction)}, - scanBindData{std::move(scanBindData)}, foreignDatabaseName{std::move(foreignDatabaseName)} {} + nodePairs{std::move(nodePairs)}, storage{std::move(storage)}, + scanFunction{std::move(scanFunction)}, scanBindData{std::move(scanBindData)}, + foreignDatabaseName{std::move(foreignDatabaseName)} {} BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other) : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)}, srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity}, storageDirection{other.storageDirection}, nodePairs{other.nodePairs}, - storage{other.storage}, indicesPath{other.indicesPath}, indptrPath{other.indptrPath}, - scanFunction{other.scanFunction}, scanBindData{other.scanBindData}, foreignDatabaseName{other.foreignDatabaseName} {} + storage{other.storage}, scanFunction{other.scanFunction}, + scanBindData{other.scanBindData}, foreignDatabaseName{other.foreignDatabaseName} {} std::unique_ptr copy() const override { return std::make_unique(*this); diff --git a/src/include/catalog/catalog_entry/node_table_catalog_entry.h b/src/include/catalog/catalog_entry/node_table_catalog_entry.h index 5bc1252b3a..32dfd20a82 100644 --- a/src/include/catalog/catalog_entry/node_table_catalog_entry.h +++ b/src/include/catalog/catalog_entry/node_table_catalog_entry.h @@ -27,9 +27,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry { public: NodeTableCatalogEntry() = default; - NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "", std::string tablePath = "") + NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "") : TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)}, - storage{std::move(storage)}, tablePath{std::move(tablePath)} {} + storage{std::move(storage)} {} // Constructor for foreign-backed tables NodeTableCatalogEntry(std::string name, std::string primaryKeyName, @@ -56,7 +56,6 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry { return getProperty(primaryKeyName); } const std::string& getStorage() const { return storage; } - const std::string& getTablePath() const { return tablePath; } std::optional getScanFunction() const override; const CreateBindDataFunc& getCreateBindDataFunc() const { return createBindDataFunc; } const std::string& getForeignDatabaseName() const { return foreignDatabaseName; } @@ -83,7 +82,6 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry { private: std::string primaryKeyName; std::string storage; - std::string tablePath; std::optional scanFunction; CreateBindDataFunc createBindDataFunc; // Callback to create bind data std::string foreignDatabaseName; diff --git a/src/include/catalog/catalog_entry/rel_group_catalog_entry.h b/src/include/catalog/catalog_entry/rel_group_catalog_entry.h index dc35de6812..807dc8f002 100644 --- a/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +++ b/src/include/catalog/catalog_entry/rel_group_catalog_entry.h @@ -39,14 +39,12 @@ class LBUG_API RelGroupCatalogEntry final : public TableCatalogEntry { RelGroupCatalogEntry(std::string tableName, common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection, std::vector relTableInfos, std::string storage = "", - std::string indicesPath = "", std::string indptrPath = "", std::optional scanFunction = std::nullopt, std::optional> scanBindData = std::nullopt, std::string foreignDatabaseName = "") : TableCatalogEntry{type_, std::move(tableName)}, srcMultiplicity{srcMultiplicity}, dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection}, relTableInfos{std::move(relTableInfos)}, storage{std::move(storage)}, - indicesPath{std::move(indicesPath)}, indptrPath{std::move(indptrPath)}, scanFunction{std::move(scanFunction)}, scanBindData{std::move(scanBindData)}, foreignDatabaseName{std::move(foreignDatabaseName)} { propertyCollection = @@ -65,8 +63,6 @@ class LBUG_API RelGroupCatalogEntry final : public TableCatalogEntry { common::ExtendDirection getStorageDirection() const { return storageDirection; } const std::string& getStorage() const { return storage; } - const std::string& getIndicesPath() const { return indicesPath; } - const std::string& getIndptrPath() const { return indptrPath; } std::optional getScanFunction() const override { return scanFunction; } const std::optional>& getScanBindData() const { return scanBindData; @@ -117,8 +113,6 @@ class LBUG_API RelGroupCatalogEntry final : public TableCatalogEntry { common::ExtendDirection storageDirection = common::ExtendDirection::BOTH; std::vector relTableInfos; std::string storage; - std::string indicesPath; - std::string indptrPath; std::optional scanFunction; std::optional> scanBindData; std::string foreignDatabaseName; // Database name for foreign-backed rel tables diff --git a/src/include/common/constants.h b/src/include/common/constants.h index 53a3241530..3a04d3660c 100644 --- a/src/include/common/constants.h +++ b/src/include/common/constants.h @@ -86,9 +86,11 @@ struct StorageConstants { struct TableOptionConstants { static constexpr char REL_STORAGE_DIRECTION_OPTION[] = "STORAGE_DIRECTION"; static constexpr char REL_STORAGE_OPTION[] = "STORAGE"; - static constexpr char TABLE_PATH[] = "TABLE_PATH"; - static constexpr char INDICES_OPTION[] = "INDICES"; - static constexpr char INDPTR_OPTION[] = "INDPTR"; + static constexpr std::string_view ICEBUG_DISK_PREFIX = "icebug-disk"; + + static bool isIceBugDiskStorage(const std::string& storage) { + return storage.starts_with(ICEBUG_DISK_PREFIX); + } }; // Hash Index Configurations diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index 063f7d31a6..bc30a263bb 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -3,8 +3,8 @@ #include #include -#include "storage/table/node_table.h" #include "processor/operator/persistent/reader/parquet/parquet_reader.h" +#include "storage/table/node_table.h" namespace lbug { namespace storage { @@ -21,7 +21,6 @@ struct IceDiskNodeTableScanState : public TableScanState { std::vector>> data; // data[rowGroup][column] std::size_t currentRowGroupBatchOffset = 0; // offset of current rowGroupBatch - IceDiskNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector, std::vector outputVectors, std::shared_ptr outChunkState) @@ -87,17 +86,20 @@ class IceDiskNodeTable final : public NodeTable { const std::string& getParquetFilePath() const { return parquetFilePath; } const catalog::NodeTableCatalogEntry* getCatalogEntry() const { return nodeTableCatalogEntry; } - IceDiskNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } + IceDiskNodeTableScanSharedState* getTableScanSharedState() const { + return tableScanSharedState.get(); + } std::size_t getNumScanMorsels(const transaction::Transaction* transaction) const; private: std::size_t getNumRowGroups(const transaction::Transaction* transaction) const; - void initIceDiskScanForRowGroup(transaction::Transaction* transaction, IceDiskNodeTableScanState& scanState) const; + void initIceDiskScanForRowGroup(transaction::Transaction* transaction, + IceDiskNodeTableScanState& scanState) const; void readParquetData(transaction::Transaction* transaction, TableScanState& scanState) const; private: - std::string parquetFilePath; + const std::string parquetFilePath; const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry; std::vector rowGroupStartOffsets; mutable std::unique_ptr tableScanSharedState; diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index d31e532670..b56bfc86fd 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -20,18 +20,19 @@ namespace storage { class IceDiskRelTable; -// The scan is reinitialized to the relevant row groups for each bound node. scanBatch is a reusable read buffer; it carries -// no positional state. High-degree nodes are handled by resuming across multiple calls. +// The scan is reinitialized to the relevant row groups for each bound node. scanBatch is a reusable +// read buffer; it carries no positional state. High-degree nodes are handled by resuming across +// multiple calls. struct IceDiskRelTableScanState : public RelTableScanState { - std::unique_ptr indicesReader; // null until first use + std::unique_ptr indicesReader; // null until first use std::unique_ptr indicesScanState; std::unique_ptr scanBatch; // reusable read buffer, lazily allocated // Resume state for the currently active bound node. // activeEdgeEnd == 0 means no node is active (start fresh from the next bound node). - uint64_t activeEdgePos = 0; // global edge row to resume from - uint64_t activeEdgeEnd = 0; // exclusive end of the active node's edge range - common::sel_t activeSelPos = 0; // sel-vector position of the active bound node + uint64_t activeEdgePos = 0; // global edge row to resume from + uint64_t activeEdgeEnd = 0; // exclusive end of the active node's edge range + common::sel_t activeSelPos = 0; // sel-vector position of the active bound node common::offset_t activeNodeOffset = 0; // node offset of the active bound node (BWD filter) IceDiskRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector, @@ -72,28 +73,32 @@ class IceDiskRelTable final : public RelTable { const std::string& getIndicesFilePath() const { return indicesFilePath; } const std::string& getIndptrFilePath() const { return indptrFilePath; } - const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { return relGroupCatalogEntry; } + const catalog::RelGroupCatalogEntry* getRelGroupCatalogEntry() const { + return relGroupCatalogEntry; + } private: // Lazy-open the indices parquet reader and allocate the reusable scan batch. - void initIndicesReaderIfNeeded(IceDiskRelTableScanState& iceState, - main::ClientContext* context, common::VirtualFileSystem* vfs, - MemoryManager* memMgr) const; + void initIndicesReaderIfNeeded(IceDiskRelTableScanState& iceState, main::ClientContext* context, + common::VirtualFileSystem* vfs, MemoryManager* memMgr) const; // Compute the CSR edge range for a node. Returns nullopt when the node has no edges. - struct EdgeRange { uint64_t start; uint64_t end; }; + struct EdgeRange { + uint64_t start; + uint64_t end; + }; std::optional getEdgeRange(common::offset_t nodeOffset, bool isFwd) const; // Find row groups covering [range.start, range.end), read up to DEFAULT_VECTOR_CAPACITY // edges starting at range.start. Returns {count, nextEdgePos} where nextEdgePos == range.end // means the node is fully scanned; otherwise resume from nextEdgePos next call. struct EdgeScanProgress { - uint64_t count; // edges written to output vectors - uint64_t nextEdgePos; // global edge row to resume from next call + uint64_t count; // edges written to output vectors + uint64_t nextEdgePos; // global edge row to resume from next call }; EdgeScanProgress collectNodeEdges(RelTableScanState& state, IceDiskRelTableScanState& iceState, - EdgeRange range, common::offset_t nodeOffset, bool isFwd, - common::table_id_t nbrTableID, common::VirtualFileSystem* vfs) const; + EdgeRange range, common::offset_t nodeOffset, bool isFwd, common::table_id_t nbrTableID, + common::VirtualFileSystem* vfs) const; void loadIndptrData(transaction::Transaction* transaction); void loadIndicesMetadata(transaction::Transaction* transaction); diff --git a/src/include/storage/table/ice_disk_utils.h b/src/include/storage/table/ice_disk_utils.h new file mode 100644 index 0000000000..5b7c6eef05 --- /dev/null +++ b/src/include/storage/table/ice_disk_utils.h @@ -0,0 +1,60 @@ +#pragma once + +#include + +#include "common/constants.h" + +namespace lbug { +namespace storage { + +struct CSRFilePaths { + std::string indices; + std::string indptr; +}; + +class IceDiskUtils { +public: + // Parses "icebug-disk", "icebug-disk:", or "icebug-disk:" and returns the path + // component. Returns empty string for the first two forms (caller interprets as current dir) + static std::string getBasePath(const std::string& storage) { + if (!storage.starts_with(common::TableOptionConstants::ICEBUG_DISK_PREFIX)) { + return ""; + } + std::string_view rest = std::string_view(storage).substr( + common::TableOptionConstants::ICEBUG_DISK_PREFIX.size()); + // Strip the optional ':' separator. + if (!rest.empty() && rest[0] == ':') { + rest = rest.substr(1); + } + return std::string(rest); // empty means "current directory" + } + + // Joins a base path with a filename. When base is empty the filename is returned + // as-is (i.e. relative to the current working directory) + static std::string joinPath(const std::string& base, const std::string& part) { + if (base.empty()) { + return part; + } + const char last = base.back(); + if (last == '/' || last == '\\') { + return base + part; + } + return base + "/" + part; + } + + // Get the file path for a given node table's parquet file + static std::string constructNodeTablePath(const std::string& dir, const std::string& name, + const std::string& suffix) { + return IceDiskUtils::joinPath(dir, "nodes_" + name + suffix); + } + + // Get the file paths for a given rel table's CSR files + static CSRFilePaths constructCSRPaths(const std::string& dir, const std::string& name, + const std::string& suffix) { + return {IceDiskUtils::joinPath(dir, "indices_" + name + suffix), + IceDiskUtils::joinPath(dir, "indptr_" + name + suffix)}; + } +}; + +} // namespace storage +} // namespace lbug diff --git a/src/processor/operator/scan/scan_multi_rel_tables.cpp b/src/processor/operator/scan/scan_multi_rel_tables.cpp index e6fa3f0d56..811b113ded 100644 --- a/src/processor/operator/scan/scan_multi_rel_tables.cpp +++ b/src/processor/operator/scan/scan_multi_rel_tables.cpp @@ -67,7 +67,8 @@ void ScanMultiRelTable::initGlobalStateInternal(ExecutionContext* context) { for (auto& [_, scanner] : scanners) { bool hasIceDiskTable = false; for (auto& relInfo : scanner.relInfos) { - if (const auto iceDiskRelTable = dynamic_cast(relInfo.table)) { + if (const auto iceDiskRelTable = + dynamic_cast(relInfo.table)) { iceDiskRelTable->initializeScanCoordination(transaction); hasIceDiskTable = true; break; @@ -121,8 +122,9 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); } else if (hasIceDiskTable) { - scanState = std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); + scanState = + std::make_unique(*MemoryManager::Get(*clientContext), + boundNodeIDVector, outVectors, nbrNodeIDVector->state); } else { scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index e949f3b6a0..5342b16dca 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -126,7 +126,6 @@ void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState, return; } - auto& nodeScanState = scanState.cast(); if (currentCommittedGroupIdx < numCommittedNodeGroups) { nodeScanState.nodeGroupIdx = currentCommittedGroupIdx++; diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index c1d97a6ddb..a4f94faa8d 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -72,7 +72,8 @@ void ScanRelTableInfo::initScanState(TableScanState& scanState, void ScanRelTable::initGlobalStateInternal(ExecutionContext* context) { if (const auto iceDiskRelTable = dynamic_cast(tableInfo.table)) { - iceDiskRelTable->initializeScanCoordination(transaction::Transaction::Get(*context->clientContext)); + iceDiskRelTable->initializeScanCoordination( + transaction::Transaction::Get(*context->clientContext)); } } @@ -96,8 +97,9 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); } else if (iceDiskTable) { - scanState = std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); + scanState = + std::make_unique(*MemoryManager::Get(*clientContext), + boundNodeIDVector, outVectors, nbrNodeIDVector->state); } else if (foreignTable) { scanState = std::make_unique(*MemoryManager::Get(*clientContext), diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp index 627f6d1aeb..b5bbeb768e 100644 --- a/src/storage/storage_manager.cpp +++ b/src/storage/storage_manager.cpp @@ -4,6 +4,7 @@ #include "catalog/catalog_entry/node_table_catalog_entry.h" #include "catalog/catalog_entry/rel_group_catalog_entry.h" #include "common/arrow/arrow.h" +#include "common/constants.h" #include "common/file_system/virtual_file_system.h" #include "common/random_engine.h" #include "common/serializer/in_mem_file_writer.h" @@ -100,8 +101,9 @@ void StorageManager::recover(main::ClientContext& clientContext, bool throwOnWal void StorageManager::createNodeTable(NodeTableCatalogEntry* entry) { tableNameCache[entry->getTableID()] = entry->getName(); if (!entry->getStorage().empty()) { - if (entry->getStorage().find("icebug-disk") != std::string::npos) { - tables[entry->getTableID()] = std::make_unique(this, entry, &memoryManager); + if (TableOptionConstants::isIceBugDiskStorage(entry->getStorage())) { + tables[entry->getTableID()] = + std::make_unique(this, entry, &memoryManager); } else if (entry->getStorage().substr(0, 8) == "arrow://") { std::string arrowId = entry->getStorage().substr(8); ArrowSchemaWrapper* schema = nullptr; @@ -167,7 +169,7 @@ void StorageManager::addRelTable(RelGroupCatalogEntry* entry, const RelTableCata tables[info.oid] = std::make_unique(entry, info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager, fromNodeTable, toNodeTable, std::move(schemaCopy), std::move(arraysCopy), arrowId); - } else if (entry->getStorage().find("icebug-disk") != std::string::npos) { + } else if (TableOptionConstants::isIceBugDiskStorage(entry->getStorage())) { tables[info.oid] = std::make_unique(entry, info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager); } else { @@ -438,11 +440,13 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca ->ptrCast(); tableNameCache[tableID] = tableEntry->getName(); if (!tableEntry->getStorage().empty()) { - if (tableEntry->getStorage().find("icebug-disk") != std::string::npos) { - tables[tableID] = std::make_unique(this, tableEntry, &memoryManager); + if (TableOptionConstants::isIceBugDiskStorage(tableEntry->getStorage())) { + tables[tableID] = + std::make_unique(this, tableEntry, &memoryManager); } else { // Create parquet-backed node table - tables[tableID] = std::make_unique(this, tableEntry, &memoryManager); + tables[tableID] = + std::make_unique(this, tableEntry, &memoryManager); } } else { // Create regular node table @@ -470,7 +474,7 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca RelTableCatalogInfo info = RelTableCatalogInfo::deserialize(deSer); DASSERT(!tables.contains(info.oid)); if (!relGroupEntry->getStorage().empty()) { - if (relGroupEntry->getStorage().find("icebug-disk") != std::string::npos) { + if (TableOptionConstants::isIceBugDiskStorage(relGroupEntry->getStorage())) { tables[info.oid] = std::make_unique(relGroupEntry, info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager); } else { diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index 10d15b65aa..c939160825 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -4,6 +4,7 @@ #include #include "catalog/catalog_entry/node_table_catalog_entry.h" +#include "common/constants.h" #include "common/data_chunk/sel_vector.h" #include "common/exception/runtime.h" #include "common/file_system/virtual_file_system.h" @@ -12,6 +13,7 @@ #include "processor/operator/persistent/reader/parquet/parquet_reader.h" #include "storage/buffer_manager/memory_manager.h" #include "storage/storage_manager.h" +#include "storage/table/ice_disk_utils.h" #include "transaction/transaction.h" using namespace lbug::catalog; @@ -22,10 +24,9 @@ using namespace lbug::transaction; namespace lbug { namespace storage { -void IceDiskNodeTableScanState::setToTable(const transaction::Transaction* /*transaction*/, Table* table_, - std::vector columnIDs_, - std::vector columnPredicateSets_, - common::RelDataDirection /*direction*/) { +void IceDiskNodeTableScanState::setToTable(const transaction::Transaction* /*transaction*/, + Table* table_, std::vector columnIDs_, + std::vector columnPredicateSets_, common::RelDataDirection /*direction*/) { // TableScanState::setToTable(transaction, table_, columnIDs_, std::move(columnPredicateSets_)); table = table_; columnIDs = std::move(columnIDs_); @@ -37,20 +38,18 @@ void IceDiskNodeTableScanState::setToTable(const transaction::Transaction* /*tra IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, const NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager) : NodeTable{storageManager, nodeTableEntry, memoryManager}, + parquetFilePath{IceDiskUtils::constructNodeTablePath( + IceDiskUtils::getBasePath(nodeTableEntry->getStorage()), nodeTableEntry->getName(), + ".parquet")}, nodeTableCatalogEntry{nodeTableEntry}, - tableScanSharedState{std::make_unique()} { - if (nodeTableEntry->getTablePath().empty()) { - throw RuntimeException("Parquet file path is empty for icebug-disk-backed node table"); - } - - parquetFilePath = nodeTableEntry->getTablePath(); -} + tableScanSharedState{std::make_unique()} {} void IceDiskNodeTable::initializeScanCoordination(const Transaction* transaction) { auto context = transaction->getClientContext(); if (context) { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - auto tempReader = std::make_unique(resolvedPath, std::vector(), context); + auto tempReader = + std::make_unique(resolvedPath, std::vector(), context); auto metadata = tempReader->getMetadata(); uint64_t currentStartOffset = 0; @@ -68,7 +67,8 @@ void IceDiskNodeTable::initScanState(Transaction* transaction, TableScanState& s bool /*resetCachedBoundNodeSelVec*/) const { auto& iceDiskNodeScanState = static_cast(scanState); - if(iceDiskNodeScanState.currentRowGroupIdx == static_cast(common::INVALID_NODE_GROUP_IDX)) { + if (iceDiskNodeScanState.currentRowGroupIdx == + static_cast(common::INVALID_NODE_GROUP_IDX)) { iceDiskNodeScanState.scanCompleted = true; return; } @@ -124,10 +124,10 @@ void IceDiskNodeTable::initIceDiskScanForRowGroup(Transaction* transaction, // Re-initialize scan for the specific row groups // Note: initializeScan can be called multiple times; the first call populates column metadata - scanState.parquetReader->initializeScan(*scanState.parquetScanState, {scanState.currentRowGroupIdx}, vfs); + scanState.parquetReader->initializeScan(*scanState.parquetScanState, + {scanState.currentRowGroupIdx}, vfs); } - bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& scanState) { auto& iceDiskNodeScanState = static_cast(scanState); if (iceDiskNodeScanState.scanCompleted) { @@ -146,15 +146,17 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc return false; } - auto outputSize = std::min(scanRowGroupBatchSize, iceDiskNodeScanState.data.size() - iceDiskNodeScanState.currentRowGroupBatchOffset); - auto numColumns = - std::min(scanState.outputVectors.size(), iceDiskNodeScanState.data[iceDiskNodeScanState.currentRowGroupBatchOffset].size()); + auto outputSize = std::min(scanRowGroupBatchSize, + iceDiskNodeScanState.data.size() - iceDiskNodeScanState.currentRowGroupBatchOffset); + auto numColumns = std::min(scanState.outputVectors.size(), + iceDiskNodeScanState.data[iceDiskNodeScanState.currentRowGroupBatchOffset].size()); for (std::size_t col = 0; col < numColumns; ++col) { auto& dstVector = *scanState.outputVectors[col]; for (std::size_t i = 0; i < outputSize; ++i) { - auto& value = *iceDiskNodeScanState.data[iceDiskNodeScanState.currentRowGroupBatchOffset + i][col]; + auto& value = *iceDiskNodeScanState + .data[iceDiskNodeScanState.currentRowGroupBatchOffset + i][col]; if (value.isNull()) { dstVector.setNull(i, true); } else { @@ -167,7 +169,8 @@ bool IceDiskNodeTable::scanInternal(Transaction* transaction, TableScanState& sc auto& nodeID = scanState.nodeIDVector->getValue(i); nodeID.tableID = tableID; // assign parquet rowIndex - nodeID.offset = rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + iceDiskNodeScanState.currentRowGroupBatchOffset + i; + nodeID.offset = rowGroupStartOffsets[iceDiskNodeScanState.currentRowGroupIdx] + + iceDiskNodeScanState.currentRowGroupBatchOffset + i; } iceDiskNodeScanState.currentRowGroupBatchOffset += outputSize; @@ -198,7 +201,8 @@ void IceDiskNodeTable::readParquetData(Transaction* transaction, TableScanState& } // Read from parquet - iceDiskNodeScanState.parquetReader->scan(*iceDiskNodeScanState.parquetScanState, parquetDataChunk); + iceDiskNodeScanState.parquetReader->scan(*iceDiskNodeScanState.parquetScanState, + parquetDataChunk); auto selSize = parquetDataChunk.state->getSelVector().getSelSize(); if (selSize > 0) { @@ -275,7 +279,8 @@ std::size_t IceDiskNodeTable::getNumTotalRows(const Transaction* transaction) { try { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - auto tempReader = std::make_unique(resolvedPath, std::vector(), context); + auto tempReader = + std::make_unique(resolvedPath, std::vector(), context); return tempReader->getMetadata()->num_rows; } catch (const std::exception& e) { @@ -293,7 +298,8 @@ std::size_t IceDiskNodeTable::getNumRowGroups(const transaction::Transaction* tr try { auto resolvedPath = VirtualFileSystem::resolvePath(context, parquetFilePath); - auto tempReader = std::make_unique(resolvedPath, std::vector(), context); + auto tempReader = + std::make_unique(resolvedPath, std::vector(), context); return tempReader->getNumRowGroups(); } catch (const std::exception& e) { diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index cf4e56a6a3..9ce3a57c90 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -4,12 +4,14 @@ #include "catalog/catalog_entry/rel_group_catalog_entry.h" #include "common/assert.h" +#include "common/constants.h" #include "common/data_chunk/data_chunk.h" #include "common/exception/runtime.h" #include "common/file_system/virtual_file_system.h" #include "common/types/internal_id_util.h" #include "processor/operator/persistent/reader/parquet/parquet_reader.h" #include "storage/storage_manager.h" +#include "storage/table/ice_disk_utils.h" #include "transaction/transaction.h" using namespace lbug::common; @@ -21,26 +23,21 @@ namespace storage { void IceDiskRelTableScanState::setToTable(const Transaction* transaction, Table* table_, std::vector columnIDs_, - std::vector columnPredicateSets_, - common::RelDataDirection direction_) { + std::vector columnPredicateSets_, common::RelDataDirection direction_) { TableScanState::setToTable(transaction, table_, std::move(columnIDs_), std::move(columnPredicateSets_)); direction = direction_; } -IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, - common::table_id_t toTableID, const StorageManager* storageManager, - MemoryManager* memoryManager) +IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, + common::table_id_t fromTableID, common::table_id_t toTableID, + const StorageManager* storageManager, MemoryManager* memoryManager) : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, relGroupCatalogEntry{relGroupEntry} { - if (relGroupEntry->getIndicesPath().empty()) { - throw RuntimeException("Indices file path is empty for icebug-disk-backed rel table"); - } - if (relGroupEntry->getIndptrPath().empty()) { - throw RuntimeException("Indptr file path is empty for icebug-disk-backed rel table"); - } - indicesFilePath = relGroupEntry->getIndicesPath(); - indptrFilePath = relGroupEntry->getIndptrPath(); + const auto base = IceDiskUtils::getBasePath(relGroupEntry->getStorage()); + auto paths = IceDiskUtils::constructCSRPaths(base, relGroupEntry->getName(), ".parquet"); + indicesFilePath = paths.indices; + indptrFilePath = paths.indptr; } void IceDiskRelTable::initializeScanCoordination(Transaction* transaction) { @@ -92,15 +89,15 @@ bool IceDiskRelTable::scanInternal(Transaction* transaction, TableScanState& sca iceState.activeEdgeEnd = 0; continue; } - iceState.activeEdgePos = range->start; - iceState.activeEdgeEnd = range->end; - iceState.activeSelPos = selPos; + iceState.activeEdgePos = range->start; + iceState.activeEdgeEnd = range->end; + iceState.activeSelPos = selPos; iceState.activeNodeOffset = nodeOffset; } - const auto [count, nextEdgePos] = collectNodeEdges(state, iceState, - {iceState.activeEdgePos, iceState.activeEdgeEnd}, - iceState.activeNodeOffset, isFwd, nbrTableID, vfs); + const auto [count, nextEdgePos] = + collectNodeEdges(state, iceState, {iceState.activeEdgePos, iceState.activeEdgeEnd}, + iceState.activeNodeOffset, isFwd, nbrTableID, vfs); iceState.activeEdgePos = nextEdgePos; if (count == 0) { @@ -133,22 +130,23 @@ void IceDiskRelTable::initIndicesReaderIfNeeded(IceDiskRelTableScanState& iceSta iceState.scanBatch = std::make_unique(numCols); for (uint32_t col = 0; col < numCols; ++col) { iceState.scanBatch->insert(col, - std::make_shared(iceState.indicesReader->getColumnType(col).copy(), memMgr)); + std::make_shared(iceState.indicesReader->getColumnType(col).copy(), + memMgr)); } } -std::optional IceDiskRelTable::getEdgeRange( - offset_t nodeOffset, bool isFwd) const { +std::optional IceDiskRelTable::getEdgeRange(offset_t nodeOffset, + bool isFwd) const { uint64_t start, end; if (isFwd) { if (nodeOffset + 1 >= indptrData.size()) { return std::nullopt; } start = indptrData[nodeOffset]; - end = indptrData[nodeOffset + 1]; + end = indptrData[nodeOffset + 1]; } else { start = 0; - end = indicesRGStarts.empty() ? 0 : indicesRGStarts.back(); + end = indicesRGStarts.empty() ? 0 : indicesRGStarts.back(); } if (start >= end) { return std::nullopt; @@ -185,12 +183,13 @@ IceDiskRelTable::EdgeScanProgress IceDiskRelTable::collectNodeEdges(RelTableScan bool done = false; while (!done) { - if (!iceState.indicesReader->scanInternal(*iceState.indicesScanState, *iceState.scanBatch)) { + if (!iceState.indicesReader->scanInternal(*iceState.indicesScanState, + *iceState.scanBatch)) { break; } - const auto& batchSel = iceState.scanBatch->state->getSelVector(); - const auto batchSize = batchSel.getSelSize(); - const auto& batch = *iceState.scanBatch; + const auto& batchSel = iceState.scanBatch->state->getSelVector(); + const auto batchSize = batchSel.getSelSize(); + const auto& batch = *iceState.scanBatch; for (uint64_t i = 0; i < batchSize; ++i) { const uint64_t globalRow = batchStart + i; @@ -202,7 +201,7 @@ IceDiskRelTable::EdgeScanProgress IceDiskRelTable::collectNodeEdges(RelTableScan break; } - const auto physIdx = batchSel[static_cast(i)]; + const auto physIdx = batchSel[static_cast(i)]; const auto destOffset = batch.getValueVector(0).getValue(physIdx); if (isFwd) { diff --git a/test/test_helper/test_helper.cpp b/test/test_helper/test_helper.cpp index 36b111d84c..5fb1a02b98 100644 --- a/test/test_helper/test_helper.cpp +++ b/test/test_helper/test_helper.cpp @@ -105,7 +105,32 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn storageIndex = end + 1; } for (auto& storagePath : storagePaths) { - if(storagePath.find("icebug-disk") != std::string::npos) { + static constexpr std::string_view iceBugPrefix = "icebug-disk"; + + if (storagePath.starts_with(iceBugPrefix)) { + // Strip "icebug-disk" prefix and optional ':' separator. + std::string basePath = storagePath.substr(iceBugPrefix.size()); + if (!basePath.empty() && basePath[0] == ':') { + basePath = basePath.substr(1); + } + // Resolve empty or relative paths relative to the schema's directory. + // Absolute paths and object-store URLs (contain "://") are left unchanged. + if (basePath.empty()) { + basePath = normalizePathForCypher(cypherDir.string()); + } else if (basePath.find("://") == std::string::npos && + std::filesystem::path(basePath).is_relative()) { + basePath = normalizePathForCypher((cypherDir / basePath).string()); + } + std::string resolvedStorage = std::string(iceBugPrefix) + ":" + basePath; + size_t pos = line.find(storagePath); + if (pos != std::string::npos) { + line.replace(pos, storagePath.length(), resolvedStorage); + } + continue; + } + + if (storagePath.find("://") != std::string::npos) { + // Non-icebug-disk URI — do not modify. continue; } From ace05d213f6772be8ebd21165cf77b1312ac94f7 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Fri, 8 May 2026 21:59:01 +0530 Subject: [PATCH 16/17] add ice-disk impl spec --- docs/icebug-disk.md | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 docs/icebug-disk.md diff --git a/docs/icebug-disk.md b/docs/icebug-disk.md new file mode 100644 index 0000000000..3c7a6245ce --- /dev/null +++ b/docs/icebug-disk.md @@ -0,0 +1,48 @@ +# Icebug-Disk Storage Format + +## Overview + +This is LadybugDB's implementation of [Icebug-Disk](https://github.com/Ladybug-Memory/icebug-format), a read-only graph storage format based on Parquet files. It is designed for efficient analytical queries on large graphs. + +## V1 + +Implements Icebug-Disk v1. + +### Version + +Version is stored in each file's metadata footer as a key-value pair: `icebug_disk_version = 1`. + +### schema.cypher + +The graph schema is declared in a `schema.cypher`, which can be loaded using `lbug -i schema.cypher` to create tables in Ladybug. + +```cypher +CREATE NODE TABLE city(id INT32, name STRING, population INT64, PRIMARY KEY(id)) WITH (storage = 'icebug-disk:'); +CREATE NODE TABLE user(id INT32, name STRING, age INT64, PRIMARY KEY(id)) WITH (storage = 'icebug-disk:'); +CREATE REL TABLE follows(FROM user TO user, since INT32) WITH (storage = 'icebug-disk:'); +CREATE REL TABLE livesin(FROM user TO city) WITH (storage = 'icebug-disk:'); +``` + +File paths can be relative or absolute and are resolved as `/nodes_{tableName}.parquet` for node tables, and `/indices_{tableName}.parquet` and `/indptr_{tableName}.parquet` for relationship tables. + +`storage = 'icebug-disk'` and `storage = 'icebug-disk:'` both resolve to the current working directory. + +Tables can also be created by manually running the above queries in the Ladybug CLI. + +If the directory is moved, the affected tables must be dropped and re-created with the updated path. This updates the file pointers in the catalog. A fresh db instance can also be used to run the `CREATE TABLE` queries with the new paths. + +Mixed tables are not supported — queries involving both `icebug-disk` and non-`icebug-disk` tables will throw a `BinderException`. + +### Node tables + +For each node table, there is a corresponding Parquet file named `nodes_{tableName}.parquet` containing a primary key column and one column per property as declared in the schema. + +### Indices + +Each relationship table has a corresponding `indices_{tableName}.parquet` file containing one row per edge. The first column is always `target` (the destination node offset), followed by zero or more edge property columns as declared in the schema. + +### Indptr + +Each relationship table has a corresponding `indptr_{tableName}.parquet` file containing the CSR row pointers. It has a single integer column with `N+1` entries, where `N` is the number of source nodes. + + From 7b0f84ab5266fe46ba43ced8eb3a3b8923dd99ef Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Sat, 9 May 2026 19:09:15 +0530 Subject: [PATCH 17/17] fix large dataset scan issues --- .../storage/table/columnar_node_table_base.h | 1 + .../storage/table/ice_disk_node_table.h | 2 + src/include/storage/table/node_table.h | 5 + src/optimizer/filter_push_down_optimizer.cpp | 16 +- .../operator/scan/count_rel_table.cpp | 3 +- src/storage/table/ice_disk_node_table.cpp | 111 ++++------ src/storage/table/ice_disk_rel_table.cpp | 16 +- .../test_files/demo_db/demo_db_graph_std.test | 202 ++++++++++++++++++ test/test_files/demo_db/demo_db_ice_disk.test | 202 ++++++++++++++++++ 9 files changed, 483 insertions(+), 75 deletions(-) diff --git a/src/include/storage/table/columnar_node_table_base.h b/src/include/storage/table/columnar_node_table_base.h index fed8e34492..f38141eb6c 100644 --- a/src/include/storage/table/columnar_node_table_base.h +++ b/src/include/storage/table/columnar_node_table_base.h @@ -44,6 +44,7 @@ class ColumnarNodeTableBase : public NodeTable { virtual ~ColumnarNodeTableBase() = default; + bool supportsPrimaryKeyScan() const override { return false; } // Columnar tables don't support modifications void insert([[maybe_unused]] transaction::Transaction* transaction, [[maybe_unused]] TableInsertState& insertState) final { diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index bc30a263bb..93b12ed706 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -72,6 +72,8 @@ class IceDiskNodeTable final : public NodeTable { bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override; + bool supportsPrimaryKeyScan() const override { return false; } + void insert(transaction::Transaction*, TableInsertState&) override { throw common::RuntimeException("Cannot insert into icebug-disk-backed node table"); } diff --git a/src/include/storage/table/node_table.h b/src/include/storage/table/node_table.h index 87c2c0fbf0..5e5f12e5ee 100644 --- a/src/include/storage/table/node_table.h +++ b/src/include/storage/table/node_table.h @@ -154,6 +154,11 @@ class LBUG_API NodeTable : public Table { virtual bool lookupPK(const transaction::Transaction* transaction, common::ValueVector* keyVector, uint64_t vectorPos, common::offset_t& result) const; + // Returns true if this table supports efficient PK index-based lookup (PRIMARY_KEY_SCAN). + // Tables without a hash index (e.g. IceDisk) must return false so the optimizer falls back + // to a FILTER+SCAN plan instead of generating PRIMARY_KEY_SCAN nodes. + virtual bool supportsPrimaryKeyScan() const { return tryGetPKIndex() != nullptr; } + void addIndex(std::unique_ptr index); void dropIndex(const std::string& name); diff --git a/src/optimizer/filter_push_down_optimizer.cpp b/src/optimizer/filter_push_down_optimizer.cpp index f9018b6314..ad38adcf0a 100644 --- a/src/optimizer/filter_push_down_optimizer.cpp +++ b/src/optimizer/filter_push_down_optimizer.cpp @@ -4,12 +4,15 @@ #include "binder/expression/property_expression.h" #include "binder/expression/scalar_function_expression.h" #include "main/client_context.h" +#include "main/database.h" #include "planner/operator/extend/logical_extend.h" #include "planner/operator/logical_empty_result.h" #include "planner/operator/logical_filter.h" #include "planner/operator/logical_hash_join.h" #include "planner/operator/logical_table_function_call.h" #include "planner/operator/scan/logical_scan_node_table.h" +#include "storage/storage_manager.h" +#include "storage/table/node_table.h" using namespace lbug::binder; using namespace lbug::common; @@ -194,13 +197,22 @@ std::shared_ptr FilterPushDownOptimizer::visitScanNodeTableRepl } if (primaryKeyEqualityComparison != nullptr) { // Try rewrite index scan auto rhs = primaryKeyEqualityComparison->getChild(1); + bool canDoPKScan = false; if (isConstantExpression(rhs)) { - auto extraInfo = std::make_unique(rhs); + auto* nodeTable = context->getDatabase() + ->getStorageManager() + ->getTable(tableIDs[0]) + ->ptrCast(); + canDoPKScan = nodeTable->supportsPrimaryKeyScan(); + } + if (canDoPKScan) { + auto extraInfo = std::make_unique( + primaryKeyEqualityComparison->getChild(1)); scan.setScanType(LogicalScanNodeTableType::PRIMARY_KEY_SCAN); scan.setExtraInfo(std::move(extraInfo)); scan.computeFlatSchema(); } else { - // Cannot rewrite and add predicate back. + // Cannot rewrite (no PK hash index or non-constant RHS); add predicate back. predicateSet.addPredicate(primaryKeyEqualityComparison); } } diff --git a/src/processor/operator/scan/count_rel_table.cpp b/src/processor/operator/scan/count_rel_table.cpp index 18511f1b72..7c7c15bff4 100644 --- a/src/processor/operator/scan/count_rel_table.cpp +++ b/src/processor/operator/scan/count_rel_table.cpp @@ -10,6 +10,7 @@ #include "storage/table/column.h" #include "storage/table/column_chunk_data.h" #include "storage/table/columnar_rel_table_base.h" +#include "storage/table/ice_disk_rel_table.h" #include "storage/table/csr_chunked_node_group.h" #include "storage/table/csr_node_group.h" #include "storage/table/rel_table_data.h" @@ -39,7 +40,7 @@ bool CountRelTable::getNextTuplesInternal(ExecutionContext* context) { auto* memoryManager = context->clientContext->getDatabase()->getMemoryManager(); for (auto* relTable : relTables) { - if (dynamic_cast(relTable) != nullptr) { + if (dynamic_cast(relTable) != nullptr || dynamic_cast(relTable) != nullptr) { totalCount += relTable->getNumTotalRows(transaction); continue; } diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index c939160825..fde1a83398 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -187,81 +187,58 @@ void IceDiskNodeTable::readParquetData(Transaction* transaction, TableScanState& throw RuntimeException("Parquet file '" + parquetFilePath + "' has no columns"); } - // Create vectors with parquet types - // Always create the data chunk to match the exact number of parquet columns - // to prevent crashes in the parquet reader when accessing result vectors - DataChunk parquetDataChunk(numColumns, scanState.outState); - + // Fresh DataChunk with its own state — do NOT share scanState.outState; we accumulate + // rows across batches while the output state is managed by scanInternal() above. + DataChunk parquetDataChunk(numColumns); + auto* memMgr = MemoryManager::Get(*transaction->getClientContext()); for (uint32_t i = 0; i < numColumns; ++i) { - const auto& parquetColumnType = iceDiskNodeScanState.parquetReader->getColumnType(i); - auto columnType = parquetColumnType.copy(); - auto vector = std::make_shared(std::move(columnType), - MemoryManager::Get(*transaction->getClientContext()), scanState.outState); - parquetDataChunk.insert(i, vector); + auto columnType = iceDiskNodeScanState.parquetReader->getColumnType(i).copy(); + parquetDataChunk.insert(i, std::make_shared(std::move(columnType), memMgr)); } - // Read from parquet - iceDiskNodeScanState.parquetReader->scan(*iceDiskNodeScanState.parquetScanState, - parquetDataChunk); - - auto selSize = parquetDataChunk.state->getSelVector().getSelSize(); - if (selSize > 0) { - iceDiskNodeScanState.data.resize(selSize); - for (std::size_t row = 0; row < selSize; ++row) { - iceDiskNodeScanState.data[row].resize( - scanState.outputVectors - .size()); // Use output vector count, not parquet column count - - // Map parquet columns to correct output vector positions by name - // Defensive check: ensure we don't access more columns than available in the chunk - auto maxParquetCol = std::min(static_cast(numColumns), - static_cast(parquetDataChunk.getNumValueVectors())); - - for (std::size_t parquetCol = 0; parquetCol < maxParquetCol; ++parquetCol) { - // Defensive check: ensure the column index is valid for the data chunk - if (parquetCol >= parquetDataChunk.getNumValueVectors()) { - continue; - } + // Pre-compute parquet-column → output-column mapping once. + const auto numCols = static_cast(parquetDataChunk.getNumValueVectors()); + std::vector colMap(numCols, INVALID_COLUMN_ID); + for (std::size_t pc = 0; pc < numCols; ++pc) { + const auto& name = iceDiskNodeScanState.parquetReader->getColumnName(pc); + if (!nodeTableCatalogEntry->containsProperty(name)) { + continue; + } + const auto colID = nodeTableCatalogEntry->getColumnID(name); + for (std::size_t oc = 0; oc < scanState.columnIDs.size(); ++oc) { + if (scanState.columnIDs[oc] == colID) { + colMap[pc] = oc; + break; + } + } + } - auto& srcVector = parquetDataChunk.getValueVectorMutable(parquetCol); + // scanInternal() returns true on the initial row-group setup call (batchSize == 0) and on + // each data batch; returns false when the row group is exhausted. Loop to read ALL rows. + while (iceDiskNodeScanState.parquetReader->scanInternal( + *iceDiskNodeScanState.parquetScanState, parquetDataChunk)) { + const auto batchSize = parquetDataChunk.state->getSelVector().getSelSize(); + if (batchSize == 0) { + continue; // row-group setup call — no data yet + } - // Get parquet column name and find its corresponding column ID - std::string parquetColumnName = - iceDiskNodeScanState.parquetReader->getColumnName(parquetCol); + const auto base = iceDiskNodeScanState.data.size(); + iceDiskNodeScanState.data.resize(base + batchSize); - // Check if the column exists first before calling getColumnID - if (!nodeTableCatalogEntry->containsProperty(parquetColumnName)) { - // Column doesn't exist in table schema, skip it + for (std::size_t row = 0; row < batchSize; ++row) { + iceDiskNodeScanState.data[base + row].resize(scanState.outputVectors.size()); + for (std::size_t pc = 0; pc < numCols; ++pc) { + const auto oc = colMap[pc]; + if (oc == INVALID_COLUMN_ID || oc >= iceDiskNodeScanState.data[base + row].size()) { continue; } - - // Find the column ID for this property name - column_id_t parquetColumnID = nodeTableCatalogEntry->getColumnID(parquetColumnName); - - // Find which output vector position corresponds to this column ID - std::size_t outputCol = INVALID_COLUMN_ID; - for (std::size_t outCol = 0; outCol < scanState.columnIDs.size(); ++outCol) { - if (scanState.columnIDs[outCol] == parquetColumnID) { - outputCol = outCol; - break; - } - } - - // Only copy data if we found a matching output position - if (outputCol != INVALID_COLUMN_ID && - outputCol < iceDiskNodeScanState.data[row].size()) { - // Defensive check: ensure the row index is valid for the source vector - if (row >= srcVector.state->getSelVector().getSelSize()) { - continue; - } - - if (srcVector.isNull(row)) { - iceDiskNodeScanState.data[row][outputCol] = - std::make_unique(Value::createNullValue()); - } else { - iceDiskNodeScanState.data[row][outputCol] = - std::make_unique(*srcVector.getAsValue(row)); - } + auto& srcVector = parquetDataChunk.getValueVectorMutable(pc); + if (srcVector.isNull(row)) { + iceDiskNodeScanState.data[base + row][oc] = + std::make_unique(Value::createNullValue()); + } else { + iceDiskNodeScanState.data[base + row][oc] = + std::make_unique(*srcVector.getAsValue(row)); } } } diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 9ce3a57c90..fe79626a99 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -157,10 +157,6 @@ std::optional IceDiskRelTable::getEdgeRange(offset_t IceDiskRelTable::EdgeScanProgress IceDiskRelTable::collectNodeEdges(RelTableScanState& state, IceDiskRelTableScanState& iceState, EdgeRange range, offset_t nodeOffset, bool isFwd, table_id_t nbrTableID, VirtualFileSystem* vfs) const { - // Reset selSize so the parquet reader's "setup" return (true, no data read) is not - // mistaken for a batch of stale data left over from the previous node's scan. - iceState.scanBatch->state->getSelVectorUnsafe().setSelSize(0); - // Locate the first row group containing range.start. auto it = std::upper_bound(indicesRGStarts.begin(), indicesRGStarts.end(), range.start); DASSERT(it != indicesRGStarts.begin()); @@ -183,6 +179,10 @@ IceDiskRelTable::EdgeScanProgress IceDiskRelTable::collectNodeEdges(RelTableScan bool done = false; while (!done) { + // Reset selSize before each scanInternal call: on a row-group transition scanInternal + // returns true without writing data and without updating selSize, so the stale value + // from the previous batch would otherwise be misread as real data. + iceState.scanBatch->state->getSelVectorUnsafe().setSelSize(0); if (!iceState.indicesReader->scanInternal(*iceState.indicesScanState, *iceState.scanBatch)) { break; @@ -274,7 +274,13 @@ void IceDiskRelTable::loadIndptrData(Transaction* transaction) { DataChunk chunk(1); chunk.insert(0, std::make_shared(reader->getColumnType(0).copy())); - while (reader->scanInternal(scanState, chunk)) { + while (true) { + // Reset selSize before each call so row-group transition calls (which return true + // without updating selSize) are not mistaken for a stale data batch. + chunk.state->getSelVectorUnsafe().setSelSize(0); + if (!reader->scanInternal(scanState, chunk)) { + break; + } auto& sel = chunk.state->getSelVector(); for (size_t i = 0; i < sel.getSelSize(); ++i) { indptrData.push_back(chunk.getValueVector(0).getValue(sel[i])); diff --git a/test/test_files/demo_db/demo_db_graph_std.test b/test/test_files/demo_db/demo_db_graph_std.test index 282558c882..a726462e6b 100644 --- a/test/test_files/demo_db/demo_db_graph_std.test +++ b/test/test_files/demo_db/demo_db_graph_std.test @@ -75,3 +75,205 @@ Adam|Karissa|40 ---- 2 Adam|Karissa Adam|Zhang + +-LOG CountStarUsers +-STATEMENT MATCH (u:user) RETURN count(*); +---- 1 +4 + +-LOG CountStarEdgesFollows +-STATEMENT MATCH ()-[:follows]->() RETURN count(*); +---- 1 +4 + +-LOG CountStarEdgesLivesIn +-STATEMENT MATCH ()-[:livesin]->() RETURN count(*); +---- 1 +4 + +-LOG AggregateAvgAge +-STATEMENT MATCH (u:user) RETURN avg(u.age); +---- 1 +36.250000 + +-LOG AggregateMinMaxAge +-STATEMENT MATCH (u:user) RETURN min(u.age), max(u.age); +---- 1 +25|50 + +-LOG AggregateSumSince +-STATEMENT MATCH ()-[f:follows]->() RETURN sum(f.since); +---- 1 +8083 + +-LOG AggregateAvgFolloweeAge +-STATEMENT MATCH (u:user)-[:follows]->(v) RETURN u.name, avg(v.age) ORDER BY u.name; +---- 3 +Adam|45.000000 +Karissa|50.000000 +Zhang|25.000000 + +-LOG OutDegreePerUser +-STATEMENT MATCH (u:user)-[:follows]->(v) RETURN u.name, count(v) ORDER BY u.name; +---- 3 +Adam|2 +Karissa|1 +Zhang|1 + +-LOG TwoHopForward +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:follows]->(c) RETURN a.name, b.name, c.name ORDER BY b.name; +---- 2 +Adam|Karissa|Zhang +Adam|Zhang|Noura + +-LOG TwoHopCrossRel +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:livesin]->(c:city) RETURN b.name, c.name ORDER BY b.name; +---- 2 +Karissa|Waterloo +Zhang|Kitchener + +-LOG TwoHopCount +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:follows]->(c) RETURN count(c); +---- 1 +2 + +-LOG BackwardInNeighbors +-STATEMENT MATCH (u)<-[:follows]-(v) WHERE u.id = 300 RETURN v.name ORDER BY v.name; +---- 2 +Adam +Karissa + +-LOG BackwardInDegree +-STATEMENT MATCH (u)<-[:follows]-(v) WHERE u.id = 300 RETURN count(v); +---- 1 +2 + +-LOG OptionalMatchFollows +-STATEMENT MATCH (u:user) OPTIONAL MATCH (u)-[:follows]->(v:user) RETURN u.name, v.name ORDER BY u.name, v.name; +---- 5 +Adam|Karissa +Adam|Zhang +Karissa|Zhang +Noura| +Zhang|Noura + +-LOG UndirectedFollows +-STATEMENT MATCH (a:user)-[:follows]-(b:user) RETURN count(*); +---- 1 +8 + +-LOG UndirectedLivesIn +-STATEMENT MATCH (a:user)-[:livesin]-(c:city) RETURN a.name, c.name ORDER BY a.name; +---- 4 +Adam|Waterloo +Karissa|Waterloo +Noura|Guelph +Zhang|Kitchener + +-LOG FilterRelProperty +-STATEMENT MATCH (a:user)-[e:follows {since: 2020}]->(b:user) RETURN a.name, b.name ORDER BY b.name; +---- 2 +Adam|Karissa +Adam|Zhang + +-LOG ReturnDistinctSince +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN DISTINCT e.since ORDER BY e.since; +---- 3 +2020 +2021 +2022 + +-LOG CountDistinctId +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:follows]->(c) RETURN count(DISTINCT c.id); +---- 1 +2 + +-LOG SkipRows +-STATEMENT MATCH (u:user) RETURN u.name ORDER BY u.age SKIP 2; +---- 2 +Karissa +Zhang + +-LOG WithPipeline +-STATEMENT MATCH (u:user) WITH avg(u.age) AS avgAge MATCH (b:user) WHERE b.age > avgAge RETURN b.name ORDER BY b.name; +---- 2 +Karissa +Zhang + +-LOG UnionAll +-STATEMENT MATCH (u:user)-[:livesin]->(c:city) WHERE c.name = 'Waterloo' RETURN u.name UNION ALL MATCH (u2:user)-[:livesin]->(c2:city) WHERE c2.name = 'Kitchener' RETURN u2.name; +---- 3 +Zhang +Adam +Karissa + +-LOG UnionAllAge +-STATEMENT MATCH (u:user)-[:follows]->(v:user) WHERE v.name = 'Zhang' RETURN u.age UNION ALL MATCH (u2:user)-[:follows]->(v2:user) WHERE v2.name = 'Karissa' RETURN u2.age; +---- 3 +30 +40 +30 + +-LOG UnionDistinct +-STATEMENT MATCH (u:user)-[:follows]->(v:user) WHERE v.name = 'Zhang' RETURN u.age UNION MATCH (u2:user)-[:follows]->(v2:user) WHERE v2.name = 'Karissa' RETURN u2.age; +---- 2 +30 +40 + +-LOG Unwind1 +-STATEMENT UNWIND ['Amy', 'Bob', 'Carol'] AS x RETURN 'name' AS label, x; +---- 3 +name|Amy +name|Bob +name|Carol + +-LOG Unwind2 +-STATEMENT UNWIND [['Amy'], ['Bob', 'Carol']] AS x RETURN x; +---- 2 +[Amy] +[Bob,Carol] + +-LOG WhereOR +-STATEMENT MATCH (a:user) WHERE a.age > 45 OR starts_with(a.name, 'Kar') RETURN a.name ORDER BY a.name; +---- 2 +Karissa +Zhang + +-LOG WhereIsNotNull +-STATEMENT MATCH (a:user) WHERE a.age IS NOT NULL AND starts_with(a.name, 'Kar') RETURN a.name; +---- 1 +Karissa + +-LOG WhereExists +-STATEMENT MATCH (u:user) WHERE EXISTS { MATCH (u)-[:follows]->(v) } RETURN u.name ORDER BY u.name; +---- 3 +Adam +Karissa +Zhang + +-LOG WhereExistsWithFilter +-STATEMENT MATCH (a:user) WHERE EXISTS { MATCH (a)-[:follows]->(b:user) WHERE b.age > 45 } RETURN a.name ORDER BY a.name; +---- 2 +Adam +Karissa + +-LOG CyclicTriangle +-STATEMENT MATCH (a:user)-[:follows]->(b:user)-[:follows]->(c:user), (a)-[:follows]->(c) RETURN a.name, b.name, c.name; +---- 1 +Adam|Karissa|Zhang + +-LOG VarLenOneToTwo +-STATEMENT MATCH (a:user)-[:follows*1..2]->(b:user) RETURN a.name, b.name ORDER BY a.name, b.name; +---- 7 +Adam|Karissa +Adam|Noura +Adam|Zhang +Adam|Zhang +Karissa|Noura +Karissa|Zhang +Zhang|Noura + +-LOG VarLenThreeHop +-STATEMENT MATCH (a:user)-[:follows*3..3]->(b:user) RETURN a.name, b.name ORDER BY a.name, b.name; +---- 1 +Adam|Noura diff --git a/test/test_files/demo_db/demo_db_ice_disk.test b/test/test_files/demo_db/demo_db_ice_disk.test index 7d938f7706..c673d64967 100644 --- a/test/test_files/demo_db/demo_db_ice_disk.test +++ b/test/test_files/demo_db/demo_db_ice_disk.test @@ -75,3 +75,205 @@ Adam|Karissa|40 ---- 2 Adam|Karissa Adam|Zhang + +-LOG CountStarUsers +-STATEMENT MATCH (u:user) RETURN count(*); +---- 1 +4 + +-LOG CountStarEdgesFollows +-STATEMENT MATCH ()-[:follows]->() RETURN count(*); +---- 1 +4 + +-LOG CountStarEdgesLivesIn +-STATEMENT MATCH ()-[:livesin]->() RETURN count(*); +---- 1 +4 + +-LOG AggregateAvgAge +-STATEMENT MATCH (u:user) RETURN avg(u.age); +---- 1 +36.250000 + +-LOG AggregateMinMaxAge +-STATEMENT MATCH (u:user) RETURN min(u.age), max(u.age); +---- 1 +25|50 + +-LOG AggregateSumSince +-STATEMENT MATCH ()-[f:follows]->() RETURN sum(f.since); +---- 1 +8083 + +-LOG AggregateAvgFolloweeAge +-STATEMENT MATCH (u:user)-[:follows]->(v) RETURN u.name, avg(v.age) ORDER BY u.name; +---- 3 +Adam|45.000000 +Karissa|50.000000 +Zhang|25.000000 + +-LOG OutDegreePerUser +-STATEMENT MATCH (u:user)-[:follows]->(v) RETURN u.name, count(v) ORDER BY u.name; +---- 3 +Adam|2 +Karissa|1 +Zhang|1 + +-LOG TwoHopForward +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:follows]->(c) RETURN a.name, b.name, c.name ORDER BY b.name; +---- 2 +Adam|Karissa|Zhang +Adam|Zhang|Noura + +-LOG TwoHopCrossRel +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:livesin]->(c:city) RETURN b.name, c.name ORDER BY b.name; +---- 2 +Karissa|Waterloo +Zhang|Kitchener + +-LOG TwoHopCount +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:follows]->(c) RETURN count(c); +---- 1 +2 + +-LOG BackwardInNeighbors +-STATEMENT MATCH (u)<-[:follows]-(v) WHERE u.id = 300 RETURN v.name ORDER BY v.name; +---- 2 +Adam +Karissa + +-LOG BackwardInDegree +-STATEMENT MATCH (u)<-[:follows]-(v) WHERE u.id = 300 RETURN count(v); +---- 1 +2 + +-LOG OptionalMatchFollows +-STATEMENT MATCH (u:user) OPTIONAL MATCH (u)-[:follows]->(v:user) RETURN u.name, v.name ORDER BY u.name, v.name; +---- 5 +Adam|Karissa +Adam|Zhang +Karissa|Zhang +Noura| +Zhang|Noura + +-LOG UndirectedFollows +-STATEMENT MATCH (a:user)-[:follows]-(b:user) RETURN count(*); +---- 1 +8 + +-LOG UndirectedLivesIn +-STATEMENT MATCH (a:user)-[:livesin]-(c:city) RETURN a.name, c.name ORDER BY a.name; +---- 4 +Adam|Waterloo +Karissa|Waterloo +Noura|Guelph +Zhang|Kitchener + +-LOG FilterRelProperty +-STATEMENT MATCH (a:user)-[e:follows {since: 2020}]->(b:user) RETURN a.name, b.name ORDER BY b.name; +---- 2 +Adam|Karissa +Adam|Zhang + +-LOG ReturnDistinctSince +-STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN DISTINCT e.since ORDER BY e.since; +---- 3 +2020 +2021 +2022 + +-LOG CountDistinctId +-STATEMENT MATCH (a:user {id: 100})-[:follows]->(b)-[:follows]->(c) RETURN count(DISTINCT c.id); +---- 1 +2 + +-LOG CyclicTriangle +-STATEMENT MATCH (a:user)-[:follows]->(b:user)-[:follows]->(c:user), (a)-[:follows]->(c) RETURN a.name, b.name, c.name; +---- 1 +Adam|Karissa|Zhang + +-LOG SkipRows +-STATEMENT MATCH (u:user) RETURN u.name ORDER BY u.age SKIP 2; +---- 2 +Karissa +Zhang + +-LOG WithPipeline +-STATEMENT MATCH (u:user) WITH avg(u.age) AS avgAge MATCH (b:user) WHERE b.age > avgAge RETURN b.name ORDER BY b.name; +---- 2 +Karissa +Zhang + +-LOG UnionAll +-STATEMENT MATCH (u:user)-[:livesin]->(c:city) WHERE c.name = 'Waterloo' RETURN u.name UNION ALL MATCH (u2:user)-[:livesin]->(c2:city) WHERE c2.name = 'Kitchener' RETURN u2.name; +---- 3 +Zhang +Adam +Karissa + +-LOG UnionAllAge +-STATEMENT MATCH (u:user)-[:follows]->(v:user) WHERE v.name = 'Zhang' RETURN u.age UNION ALL MATCH (u2:user)-[:follows]->(v2:user) WHERE v2.name = 'Karissa' RETURN u2.age; +---- 3 +30 +40 +30 + +-LOG UnionDistinct +-STATEMENT MATCH (u:user)-[:follows]->(v:user) WHERE v.name = 'Zhang' RETURN u.age UNION MATCH (u2:user)-[:follows]->(v2:user) WHERE v2.name = 'Karissa' RETURN u2.age; +---- 2 +30 +40 + +-LOG Unwind1 +-STATEMENT UNWIND ['Amy', 'Bob', 'Carol'] AS x RETURN 'name' AS label, x; +---- 3 +name|Amy +name|Bob +name|Carol + +-LOG Unwind2 +-STATEMENT UNWIND [['Amy'], ['Bob', 'Carol']] AS x RETURN x; +---- 2 +[Amy] +[Bob,Carol] + +-LOG WhereOR +-STATEMENT MATCH (a:user) WHERE a.age > 45 OR starts_with(a.name, 'Kar') RETURN a.name ORDER BY a.name; +---- 2 +Karissa +Zhang + +-LOG WhereIsNotNull +-STATEMENT MATCH (a:user) WHERE a.age IS NOT NULL AND starts_with(a.name, 'Kar') RETURN a.name; +---- 1 +Karissa + +-LOG WhereExists +-STATEMENT MATCH (u:user) WHERE EXISTS { MATCH (u)-[:follows]->(v) } RETURN u.name ORDER BY u.name; +---- 3 +Adam +Karissa +Zhang + +-LOG WhereExistsWithFilter +-STATEMENT MATCH (a:user) WHERE EXISTS { MATCH (a)-[:follows]->(b:user) WHERE b.age > 45 } RETURN a.name ORDER BY a.name; +---- 2 +Adam +Karissa + +-LOG VarLenOneToTwo +-STATEMENT MATCH (a:user)-[:follows*1..2]->(b:user) RETURN a.name, b.name ORDER BY a.name, b.name; +---- 7 +Adam|Karissa +Adam|Noura +Adam|Zhang +Adam|Zhang +Karissa|Noura +Karissa|Zhang +Zhang|Noura + +-LOG VarLenThreeHop +-STATEMENT MATCH (a:user)-[:follows*3..3]->(b:user) RETURN a.name, b.name ORDER BY a.name, b.name; +---- 1 +Adam|Noura