Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "velox/connectors/hive/HiveConfig.h"
#include "velox/connectors/hive/HiveConnectorSplit.h"
#include "velox/exec/PlanNodeStats.h"
#include "velox/functions/sparksql/SparkQueryConfig.h"
#ifdef GLUTEN_ENABLE_GPU
#include <cudf/io/types.hpp>
#include "cudf/GpuLock.h"
Expand All @@ -33,6 +34,7 @@
#include "operators/plannodes/RowVectorStream.h"

using namespace facebook;
using facebook::velox::functions::sparksql::SparkQueryConfig;

namespace gluten {

Expand Down Expand Up @@ -586,7 +588,8 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kPreferredOutputBatchBytes] =
std::to_string(veloxCfg_->get<uint64_t>(kVeloxPreferredBatchBytes, 10L << 20));
try {
configs[velox::core::QueryConfig::kSparkAnsiEnabled] = veloxCfg_->get<std::string>(kAnsiEnabled, "false");
configs[SparkQueryConfig::qualify(SparkQueryConfig::kAnsiEnabled)] =
veloxCfg_->get<std::string>(kAnsiEnabled, "false");
configs[velox::core::QueryConfig::kSessionTimezone] =
normalizeSessionTimezone(veloxCfg_->get<std::string>(kSessionTimezone, ""));
// Adjust timestamp according to the above configured session timezone.
Expand Down Expand Up @@ -662,17 +665,17 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
std::to_string(veloxCfg_->get<uint64_t>(kHashProbeBloomFilterPushdownMaxSize, 0));

if (const auto opt = veloxCfg_->get<std::string>(kSparkBloomFilterExpectedNumItems)) {
configs[velox::core::QueryConfig::kSparkBloomFilterExpectedNumItems] = opt.value();
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterExpectedNumItems)] = opt.value();
}
if (const auto opt = veloxCfg_->get<std::string>(kSparkBloomFilterNumBits)) {
configs[velox::core::QueryConfig::kSparkBloomFilterNumBits] = opt.value();
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterNumBits)] = opt.value();
}
if (const auto opt = veloxCfg_->get<std::string>(kSparkBloomFilterMaxNumBits)) {
// Velox will check memory cannot exceed 4194304.
configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumBits] = opt.value();
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterMaxNumBits)] = opt.value();
}
if (const auto opt = veloxCfg_->get<std::string>(kSparkBloomFilterMaxNumItems)) {
configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumItems] = opt.value();
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterMaxNumItems)] = opt.value();
}
// spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver takes no effect if
// spark.gluten.sql.columnar.backend.velox.IOThreads is set to 0
Expand All @@ -688,14 +691,14 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
// Disable driver cpu time slicing.
configs[velox::core::QueryConfig::kDriverCpuTimeSliceLimitMs] = "0";

configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId);
configs[SparkQueryConfig::qualify(SparkQueryConfig::kPartitionId)] = std::to_string(taskInfo_.partitionId);

// Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY'
// or 'legacy'
if (veloxCfg_->get<std::string>(kSparkLegacyTimeParserPolicy, "") == "LEGACY") {
configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true";
configs[SparkQueryConfig::qualify(SparkQueryConfig::kLegacyDateFormatter)] = "true";
} else {
configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false";
configs[SparkQueryConfig::qualify(SparkQueryConfig::kLegacyDateFormatter)] = "false";
}

if (veloxCfg_->get<std::string>(kSparkMapKeyDedupPolicy, "") == "EXCEPTION") {
Expand All @@ -704,10 +707,10 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kThrowExceptionOnDuplicateMapKeys] = "false";
}

configs[velox::core::QueryConfig::kSparkLegacyStatisticalAggregate] =
configs[SparkQueryConfig::qualify(SparkQueryConfig::kLegacyStatisticalAggregate)] =
std::to_string(veloxCfg_->get<bool>(kSparkLegacyStatisticalAggregate, false));

configs[velox::core::QueryConfig::kSparkJsonIgnoreNullFields] =
configs[SparkQueryConfig::qualify(SparkQueryConfig::kJsonIgnoreNullFields)] =
std::to_string(veloxCfg_->get<bool>(kSparkJsonIgnoreNullFields, true));

configs[velox::core::QueryConfig::kExprMaxCompiledRegexes] =
Expand Down
6 changes: 4 additions & 2 deletions cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@
#include "config/VeloxConfig.h"
#include "operators/plannodes/IteratorSplit.h"
#include "velox/core/QueryCtx.h"
#include "velox/functions/sparksql/SparkQueryConfig.h"

using namespace facebook;

using facebook::velox::functions::sparksql::SparkQueryConfig;
namespace gluten {

/// This class is used to validate whether the computing of
Expand All @@ -33,7 +34,8 @@ class SubstraitToVeloxPlanValidator {
public:
SubstraitToVeloxPlanValidator(memory::MemoryPool* pool) {
std::unordered_map<std::string, std::string> configs{
{velox::core::QueryConfig::kSparkPartitionId, "0"}, {velox::core::QueryConfig::kSessionTimezone, "UTC"}};
{SparkQueryConfig::qualify(SparkQueryConfig::kPartitionId), "0"},
{velox::core::QueryConfig::kSessionTimezone, "UTC"}};
veloxCfg_ = std::make_shared<facebook::velox::config::ConfigBase>(std::move(configs));
planConverter_ = std::make_unique<SubstraitToVeloxPlanConverter>(
pool,
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get-velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ set -exu

CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
VELOX_BRANCH=dft-2026_05_22
VELOX_ENHANCED_BRANCH=ibm-2026_05_22
VELOX_BRANCH=dft-2026_05_25
VELOX_ENHANCED_BRANCH=ibm-2026_05_25
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
ENABLE_ENHANCED_FEATURES=OFF
Expand Down
Loading