Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 14 additions & 71 deletions src/duckdb/src/execution/index/art/art.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/execution/index/art/art_builder.hpp"
#include "duckdb/execution/index/art/art_key.hpp"
#include "duckdb/execution/index/art/art_merger.hpp"
#include "duckdb/execution/index/art/art_operator.hpp"
#include "duckdb/execution/index/art/art_scanner.hpp"
#include "duckdb/execution/index/art/base_leaf.hpp"
#include "duckdb/execution/index/art/base_node.hpp"
#include "duckdb/execution/index/art/iterator.hpp"
Expand All @@ -21,9 +25,6 @@
#include "duckdb/storage/metadata/metadata_reader.hpp"
#include "duckdb/storage/table/scan_state.hpp"
#include "duckdb/storage/table_io_manager.hpp"
#include "duckdb/execution/index/art/art_scanner.hpp"
#include "duckdb/execution/index/art/art_merger.hpp"
#include "duckdb/execution/index/art/art_operator.hpp"

namespace duckdb {

Expand Down Expand Up @@ -424,76 +425,17 @@ void ART::GenerateKeyVectors(ArenaAllocator &allocator, DataChunk &input, Vector
}

//===--------------------------------------------------------------------===//
// Construct from sorted data.
// Build from sorted data.
//===--------------------------------------------------------------------===//

bool ART::ConstructInternal(const unsafe_vector<ARTKey> &keys, const unsafe_vector<ARTKey> &row_ids, Node &node,
ARTKeySection &section) {
D_ASSERT(section.start < keys.size());
D_ASSERT(section.end < keys.size());
D_ASSERT(section.start <= section.end);

auto &start = keys[section.start];
auto &end = keys[section.end];
D_ASSERT(start.len != 0);

// Increment the depth until we reach a leaf or find a mismatching byte.
auto prefix_depth = section.depth;
while (start.len != section.depth && start.ByteMatches(end, section.depth)) {
section.depth++;
}

if (start.len == section.depth) {
// We reached a leaf. All the bytes of start_key and end_key match.
auto row_id_count = section.end - section.start + 1;
if (IsUnique() && row_id_count != 1) {
return false;
}

reference<Node> ref(node);
auto count = UnsafeNumericCast<uint8_t>(start.len - prefix_depth);
Prefix::New(*this, ref, start, prefix_depth, count);
if (row_id_count == 1) {
Leaf::New(ref, row_ids[section.start].GetRowId());
} else {
// Loop and insert the row IDs.
// We cannot use Construct in the leaf because row IDs are not sorted.
ArenaAllocator arena(BufferAllocator::Get(db));
for (idx_t i = section.start; i < section.start + row_id_count; i++) {
ARTOperator::Insert(arena, *this, ref, row_ids[i], 0, row_ids[i], GateStatus::GATE_SET, nullptr,
IndexAppendMode::DEFAULT);
}
ref.get().SetGateStatus(GateStatus::GATE_SET);
}
return true;
}

// Create a new node and recurse.
unsafe_vector<ARTKeySection> children;
section.GetChildSections(children, keys);

// Create the prefix.
reference<Node> ref(node);
auto prefix_length = section.depth - prefix_depth;
Prefix::New(*this, ref, start, prefix_depth, prefix_length);

// Create the node.
Node::New(*this, ref, Node::GetNodeType(children.size()));
for (auto &child : children) {
Node new_child;
auto success = ConstructInternal(keys, row_ids, new_child, child);
Node::InsertChild(*this, ref, child.key_byte, new_child);
if (!success) {
return false;
}
}
return true;
}
ARTConflictType ART::Build(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count) {
ArenaAllocator arena(BufferAllocator::Get(db));
ARTBuilder builder(arena, *this, keys, row_ids);
builder.Init(tree, row_count - 1);

bool ART::Construct(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count) {
ARTKeySection section(0, row_count - 1, 0, 0);
if (!ConstructInternal(keys, row_ids, tree, section)) {
return false;
auto result = builder.Build();
if (result != ARTConflictType::NO_CONFLICT) {
return result;
}

#ifdef DEBUG
Expand All @@ -504,7 +446,8 @@ bool ART::Construct(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids,
it.Scan(empty_key, NumericLimits<row_t>().Maximum(), row_ids_debug, false);
D_ASSERT(row_count == row_ids_debug.size());
#endif
return true;

return ARTConflictType::NO_CONFLICT;
}

//===--------------------------------------------------------------------===//
Expand Down
91 changes: 91 additions & 0 deletions src/duckdb/src/execution/index/art/art_builder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include "duckdb/execution/index/art/art_builder.hpp"

#include "duckdb/execution/index/art/art_key.hpp"
#include "duckdb/execution/index/art/prefix.hpp"
#include "duckdb/execution/index/art/leaf.hpp"
#include "duckdb/execution/index/art/art_operator.hpp"

namespace duckdb {

ARTConflictType ARTBuilder::Build() {
while (!s.empty()) {
// Copy the entry so we can pop it.
auto entry = s.top();
s.pop();

D_ASSERT(entry.start < keys.size());
D_ASSERT(entry.end < keys.size());
D_ASSERT(entry.start <= entry.end);

auto &start = keys[entry.start];
auto &end = keys[entry.end];
D_ASSERT(start.len != 0);

// Increment the depth until we reach a leaf or find a mismatching byte.
auto prefix_depth = entry.depth;
while (start.len != entry.depth && start.ByteMatches(end, entry.depth)) {
entry.depth++;
}

// True, if we reached a leaf: all bytes of start_key and end_key match.
if (start.len == entry.depth) {
// Get the number of row IDs in the leaf.
auto row_id_count = entry.end - entry.start + 1;
if (art.IsUnique() && row_id_count != 1) {
return ARTConflictType::CONSTRAINT;
}

reference<Node> ref(entry.node);
auto count = UnsafeNumericCast<uint8_t>(start.len - prefix_depth);
Prefix::New(art, ref, start, prefix_depth, count);

// Inline the row ID.
if (row_id_count == 1) {
Leaf::New(ref, row_ids[entry.start].GetRowId());
continue;
}

// Loop and insert the row IDs.
// We cannot iterate into the nested leaf with the builder
// because row IDs are not sorted.
for (idx_t i = entry.start; i < entry.start + row_id_count; i++) {
ARTOperator::Insert(arena, art, ref, row_ids[i], 0, row_ids[i], GateStatus::GATE_SET, nullptr,
IndexAppendMode::DEFAULT);
}
ref.get().SetGateStatus(GateStatus::GATE_SET);
continue;
}

// Create the prefix. Returns early, if the prefix_length is zero.
reference<Node> ref(entry.node);
auto prefix_length = entry.depth - prefix_depth;
Prefix::New(art, ref, start, prefix_depth, prefix_length);

vector<idx_t> child_offsets;
child_offsets.emplace_back(entry.start);
for (idx_t i = entry.start + 1; i <= entry.end; i++) {
if (keys[i - 1].data[entry.depth] != keys[i].data[entry.depth]) {
child_offsets.emplace_back(i);
}
}

// Create a new node containing the children.
Node::New(art, ref, Node::GetNodeType(child_offsets.size()));
auto start_offset = child_offsets[0];
for (idx_t i = 1; i <= child_offsets.size(); i++) {
auto child_byte = keys[start_offset].data[entry.depth];
// FIXME: Improve performance by either returning a reference to the child directly,
// FIXME: or by calling InsertChild after processing the child (at the end of the stack loop).
Node::InsertChild(art, ref, child_byte);
auto child = ref.get().Node::GetChildMutable(art, child_byte, true);
auto end_offset = i != child_offsets.size() ? child_offsets[i] - 1 : entry.end;
s.emplace(*child, start_offset, end_offset, entry.depth + 1);
start_offset = end_offset + 1;
}
}

// We exhausted the stack.
return ARTConflictType::NO_CONFLICT;
}

} // namespace duckdb
23 changes: 0 additions & 23 deletions src/duckdb/src/execution/index/art/art_key.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,27 +163,4 @@ idx_t ARTKey::GetMismatchPos(const ARTKey &other, const idx_t start) const {
return DConstants::INVALID_INDEX;
}

//===--------------------------------------------------------------------===//
// ARTKeySection
//===--------------------------------------------------------------------===//

ARTKeySection::ARTKeySection(idx_t start, idx_t end, idx_t depth, data_t byte)
: start(start), end(end), depth(depth), key_byte(byte) {
}

ARTKeySection::ARTKeySection(idx_t start, idx_t end, const unsafe_vector<ARTKey> &keys, const ARTKeySection &section)
: start(start), end(end), depth(section.depth + 1), key_byte(keys[end].data[section.depth]) {
}

void ARTKeySection::GetChildSections(unsafe_vector<ARTKeySection> &sections, const unsafe_vector<ARTKey> &keys) {
auto child_idx = start;
for (idx_t i = start + 1; i <= end; i++) {
if (keys[i - 1].data[depth] != keys[i].data[depth]) {
sections.emplace_back(child_idx, i - 1, keys, *this);
child_idx = i;
}
}
sections.emplace_back(child_idx, end, keys, *this);
}

} // namespace duckdb
16 changes: 8 additions & 8 deletions src/duckdb/src/execution/index/art/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,31 +198,31 @@ void Node::DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte, c
//===--------------------------------------------------------------------===//

template <class NODE>
unsafe_optional_ptr<Node> GetChildInternal(ART &art, NODE &node, const uint8_t byte) {
static unsafe_optional_ptr<Node> GetChildInternal(ART &art, NODE &node, const uint8_t byte, const bool unsafe) {
D_ASSERT(node.HasMetadata());

auto type = node.GetType();
switch (type) {
case NType::NODE_4:
return Node4::GetChild(Node::Ref<Node4>(art, node, type), byte);
return Node4::GetChild(Node::Ref<Node4>(art, node, type), byte, unsafe);
case NType::NODE_16:
return Node16::GetChild(Node::Ref<Node16>(art, node, type), byte);
return Node16::GetChild(Node::Ref<Node16>(art, node, type), byte, unsafe);
case NType::NODE_48:
return Node48::GetChild(Node::Ref<Node48>(art, node, type), byte);
return Node48::GetChild(Node::Ref<Node48>(art, node, type), byte, unsafe);
case NType::NODE_256: {
return Node256::GetChild(Node::Ref<Node256>(art, node, type), byte);
return Node256::GetChild(Node::Ref<Node256>(art, node, type), byte, unsafe);
}
default:
throw InternalException("Invalid node type for GetChildInternal: %d.", type);
}
}

const unsafe_optional_ptr<Node> Node::GetChild(ART &art, const uint8_t byte) const {
return GetChildInternal(art, *this, byte);
return GetChildInternal(art, *this, byte, false);
}

unsafe_optional_ptr<Node> Node::GetChildMutable(ART &art, const uint8_t byte) const {
return GetChildInternal(art, *this, byte);
unsafe_optional_ptr<Node> Node::GetChildMutable(ART &art, const uint8_t byte, const bool unsafe) const {
return GetChildInternal(art, *this, byte, unsafe);
}

template <class NODE>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ SinkResultType PhysicalCreateARTIndex::SinkSorted(OperatorSinkInput &input) cons
auto art = make_uniq<ART>(info->index_name, l_index->GetConstraintType(), l_index->GetColumnIds(),
l_index->table_io_manager, l_index->unbound_expressions, storage.db,
l_index->Cast<ART>().allocators);
if (!art->Construct(l_state.keys, l_state.row_ids, l_state.key_chunk.size())) {
if (art->Build(l_state.keys, l_state.row_ids, l_state.key_chunk.size()) != ARTConflictType::NO_CONFLICT) {
throw ConstraintException("Data contains duplicates on indexed column(s)");
}

Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "0-dev3109"
#define DUCKDB_PATCH_VERSION "0-dev3141"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 4
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.4.0-dev3109"
#define DUCKDB_VERSION "v1.4.0-dev3141"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "d229d97f40"
#define DUCKDB_SOURCE_ID "22e6d1e375"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
7 changes: 2 additions & 5 deletions src/duckdb/src/include/duckdb/execution/index/art/art.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ class ART : public BoundIndex {
//! Drop the ART.
void CommitDrop(IndexLock &index_lock) override;

//! Construct an ART from a vector of sorted keys and their row IDs.
bool Construct(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count);
//! Build an ART from a vector of sorted keys and their row IDs.
ARTConflictType Build(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count);

//! Merge another ART into this ART. Both must be locked.
//! FIXME: Return ARTConflictType instead of a boolean.
Expand Down Expand Up @@ -138,9 +138,6 @@ class ART : public BoundIndex {
string GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index,
DataChunk &input) override;

bool ConstructInternal(const unsafe_vector<ARTKey> &keys, const unsafe_vector<ARTKey> &row_ids, Node &node,
ARTKeySection &section);

void InitializeMergeUpperBounds(unsafe_vector<idx_t> &upper_bounds);
void InitializeMerge(Node &node, unsafe_vector<idx_t> &upper_bounds);

Expand Down
55 changes: 55 additions & 0 deletions src/duckdb/src/include/duckdb/execution/index/art/art_builder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/execution/index/art/art_builder.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/execution/index/art/art.hpp"
#include "duckdb/common/stack.hpp"

namespace duckdb {

class ARTBuilder {
public:
ARTBuilder() = delete;
ARTBuilder(ArenaAllocator &arena, ART &art, const unsafe_vector<ARTKey> &keys, const unsafe_vector<ARTKey> &row_ids)
: arena(arena), art(art), keys(keys), row_ids(row_ids) {
}

public:
//! Initialize the ART builder by passing a reference to the root node.
void Init(Node &node, const idx_t end) {
s.emplace(node, 0, end, 0);
}
//! Build the ART starting at the first entry in the stack.
ARTConflictType Build();

private:
struct NodeEntry {
NodeEntry() = delete;
NodeEntry(Node &node, const idx_t start, const idx_t end, const idx_t depth)
: node(node), start(start), end(end), depth(depth) {};

Node &node;
idx_t start;
idx_t end;
idx_t depth;
};

//! The arena holds any temporary memory allocated during the Build phase.
ArenaAllocator &arena;
//! The ART holding the node memory.
ART &art;
//! The keys to build the ART from.
const unsafe_vector<ARTKey> &keys;
//! The row IDs matching the keys.
const unsafe_vector<ARTKey> &row_ids;
//! The stack. While merging, NodeEntry elements are pushed onto of the stack.
stack<NodeEntry> s;
};

} // namespace duckdb
Loading
Loading