Skip to content

Commit ffed2ab

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to 99a37abe11
1 parent 16e352f commit ffed2ab

29 files changed

Lines changed: 499 additions & 114 deletions

src/duckdb/extension/parquet/parquet_metadata.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,9 @@ void ParquetMetaDataOperator::BindSchema<ParquetMetadataOperatorType::FILE_META_
709709

710710
names.emplace_back("footer_size");
711711
return_types.emplace_back(LogicalType::UBIGINT);
712+
713+
names.emplace_back("column_orders");
714+
return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR));
712715
}
713716

714717
idx_t ParquetFileMetadataProcessor::TotalRowCount(ParquetReader &reader) {
@@ -739,6 +742,17 @@ void ParquetFileMetadataProcessor::ReadRow(vector<reference<Vector>> &output, id
739742
output[7].get().SetValue(output_idx, Value::UBIGINT(reader.GetHandle().GetFileSize()));
740743
// footer_size
741744
output[8].get().SetValue(output_idx, Value::UBIGINT(reader.metadata->footer_size));
745+
// column_orders
746+
Value column_orders_value;
747+
if (meta_data->__isset.column_orders) {
748+
vector<Value> column_orders;
749+
column_orders.reserve(meta_data->column_orders.size());
750+
for (auto &column_order : meta_data->column_orders) {
751+
column_orders.push_back(Value(ConvertParquetElementToString(column_order)));
752+
}
753+
column_orders_value = Value::LIST(LogicalType::VARCHAR, column_orders);
754+
}
755+
output[9].get().SetValue(output_idx, column_orders_value);
742756
}
743757

744758
//===--------------------------------------------------------------------===//

src/duckdb/extension/parquet/parquet_writer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,11 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
411411
file_meta_data.created_by =
412412
StringUtil::Format("DuckDB version %s (build %s)", DuckDB::LibraryVersion(), DuckDB::SourceID());
413413

414+
duckdb_parquet::ColumnOrder column_order;
415+
column_order.__set_TYPE_ORDER(duckdb_parquet::TypeDefinedOrder());
416+
file_meta_data.column_orders.resize(column_names.size(), column_order);
417+
file_meta_data.__isset.column_orders = true;
418+
414419
for (auto &kv_pair : kv_metadata) {
415420
duckdb_parquet::KeyValue kv;
416421
kv.__set_key(kv_pair.first);

src/duckdb/src/common/enum_util.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108
#include "duckdb/execution/index/unbound_index.hpp"
109109
#include "duckdb/execution/operator/csv_scanner/csv_option.hpp"
110110
#include "duckdb/execution/operator/csv_scanner/csv_state.hpp"
111+
#include "duckdb/execution/physical_operator.hpp"
111112
#include "duckdb/execution/physical_table_scan_enum.hpp"
112113
#include "duckdb/execution/reservoir_sample.hpp"
113114
#include "duckdb/function/aggregate_state.hpp"
@@ -3218,6 +3219,26 @@ OnEntryNotFound EnumUtil::FromString<OnEntryNotFound>(const char *value) {
32183219
return static_cast<OnEntryNotFound>(StringUtil::StringToEnum(GetOnEntryNotFoundValues(), 2, "OnEntryNotFound", value));
32193220
}
32203221

3222+
const StringUtil::EnumStringLiteral *GetOperatorCachingModeValues() {
3223+
static constexpr StringUtil::EnumStringLiteral values[] {
3224+
{ static_cast<uint32_t>(OperatorCachingMode::NONE), "NONE" },
3225+
{ static_cast<uint32_t>(OperatorCachingMode::PARTITIONED), "PARTITIONED" },
3226+
{ static_cast<uint32_t>(OperatorCachingMode::ORDERED), "ORDERED" },
3227+
{ static_cast<uint32_t>(OperatorCachingMode::UNORDERED), "UNORDERED" }
3228+
};
3229+
return values;
3230+
}
3231+
3232+
template<>
3233+
const char* EnumUtil::ToChars<OperatorCachingMode>(OperatorCachingMode value) {
3234+
return StringUtil::EnumToString(GetOperatorCachingModeValues(), 4, "OperatorCachingMode", static_cast<uint32_t>(value));
3235+
}
3236+
3237+
template<>
3238+
OperatorCachingMode EnumUtil::FromString<OperatorCachingMode>(const char *value) {
3239+
return static_cast<OperatorCachingMode>(StringUtil::StringToEnum(GetOperatorCachingModeValues(), 4, "OperatorCachingMode", value));
3240+
}
3241+
32213242
const StringUtil::EnumStringLiteral *GetOperatorFinalResultTypeValues() {
32223243
static constexpr StringUtil::EnumStringLiteral values[] {
32233244
{ static_cast<uint32_t>(OperatorFinalResultType::FINISHED), "FINISHED" },

src/duckdb/src/common/file_system.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,12 @@ bool FileSystem::TryRemoveFile(const string &filename, optional_ptr<FileOpener>
575575
return false;
576576
}
577577

578+
void FileSystem::RemoveFiles(const vector<string> &filenames, optional_ptr<FileOpener> opener) {
579+
for (const auto &filename : filenames) {
580+
TryRemoveFile(filename, opener);
581+
}
582+
}
583+
578584
void FileSystem::FileSync(FileHandle &handle) {
579585
throw NotImplementedException("%s: FileSync is not implemented!", GetName());
580586
}

src/duckdb/src/common/sort/sort.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ SinkFinalizeType Sort::Finalize(ClientContext &context, OperatorSinkFinalizeInpu
339339
gstate.total_count += sorted_run->Count();
340340
maximum_run_count = MaxValue(maximum_run_count, sorted_run->Count());
341341
}
342-
if (gstate.num_threads == 1 || context.config.verify_parallelism) {
342+
if (context.config.verify_parallelism) {
343343
gstate.partition_size = STANDARD_VECTOR_SIZE;
344344
} else {
345345
gstate.partition_size = MinValue<idx_t>(gstate.total_count, DEFAULT_ROW_GROUP_SIZE);

src/duckdb/src/common/sort/sorted_run.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ void SortedRunScanState::Scan(const SortedRun &sorted_run, const Vector &sort_ke
4747
}
4848
}
4949

50+
void SortedRunScanState::Clear() {
51+
payload_state.pin_state.row_handles.clear();
52+
payload_state.pin_state.heap_handles.clear();
53+
}
54+
5055
template <class SORT_KEY, class PHYSICAL_TYPE>
5156
void TemplatedGetKeyAndPayload(SORT_KEY *const *const sort_keys, SORT_KEY *temp_keys, const idx_t &count,
5257
DataChunk &key, data_ptr_t *const payload_ptrs) {

src/duckdb/src/common/sort/sorted_run_merger.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ class SortedRunMergerLocalState : public LocalSourceState {
103103
bool TaskFinished() const;
104104
//! Do the work this thread has been assigned
105105
SourceResultType ExecuteTask(SortedRunMergerGlobalState &gstate, optional_ptr<DataChunk> chunk);
106+
//! Clear outstanding allocations
107+
void Clear();
106108

107109
private:
108110
//! Computes upper partition boundaries using K-way Merge Path
@@ -315,6 +317,13 @@ SortedRunMergerLocalState::SortedRunMergerLocalState(SortedRunMergerGlobalState
315317
}
316318
}
317319

320+
void SortedRunMergerLocalState::Clear() {
321+
in_memory_states.clear();
322+
external_states.clear();
323+
merged_partition.Reset();
324+
sorted_run_scan_state.Clear();
325+
}
326+
318327
bool SortedRunMergerLocalState::TaskFinished() const {
319328
switch (task) {
320329
case SortedRunMergerTask::COMPUTE_BOUNDARIES:
@@ -856,7 +865,13 @@ SourceResultType SortedRunMerger::GetData(ExecutionContext &, DataChunk &chunk,
856865
}
857866
}
858867

859-
return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
868+
if (chunk.size() != 0) {
869+
return SourceResultType::HAVE_MORE_OUTPUT;
870+
}
871+
872+
// Done
873+
lstate.Clear();
874+
return SourceResultType::FINISHED;
860875
}
861876

862877
OperatorPartitionData SortedRunMerger::GetPartitionData(ExecutionContext &, DataChunk &, GlobalSourceState &,
@@ -890,6 +905,7 @@ SourceResultType SortedRunMerger::MaterializeSortedRun(ExecutionContext &, Opera
890905
break;
891906
}
892907
}
908+
lstate.Clear(); // Done
893909

894910
// The thread that completes the materialization returns FINISHED, all other threads return HAVE_MORE_OUTPUT
895911
return res;
@@ -904,11 +920,12 @@ unique_ptr<SortedRun> SortedRunMerger::GetSortedRun(GlobalSourceState &global_st
904920
}
905921
auto &target = *gstate.materialized_partitions[0];
906922
for (idx_t i = 1; i < gstate.materialized_partitions.size(); i++) {
907-
auto &source = *gstate.materialized_partitions[i];
908-
target.key_data->Combine(*source.key_data);
923+
auto &source = gstate.materialized_partitions[i];
924+
target.key_data->Combine(*source->key_data);
909925
if (target.payload_data) {
910-
target.payload_data->Combine(*source.payload_data);
926+
target.payload_data->Combine(*source->payload_data);
911927
}
928+
source.reset();
912929
}
913930
auto res = std::move(gstate.materialized_partitions[0]);
914931
gstate.materialized_partitions.clear();

src/duckdb/src/common/types/data_chunk.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ void DataChunk::Reference(DataChunk &chunk) {
131131
}
132132

133133
void DataChunk::Move(DataChunk &chunk) {
134-
SetCardinality(chunk);
135134
SetCapacity(chunk);
135+
SetCardinality(chunk);
136136
data = std::move(chunk.data);
137137
vector_caches = std::move(chunk.vector_caches);
138138

src/duckdb/src/common/types/row/partitioned_tuple_data.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ void PartitionedTupleData::AppendUnified(PartitionedTupleDataAppendState &state,
6767
auto &partition = *partitions[partition_index.GetIndex()];
6868
auto &partition_pin_state = state.partition_pin_states[partition_index.GetIndex()];
6969

70-
const auto size_before = partition.SizeInBytes();
70+
const auto size_before = partition.data_size;
7171
partition.AppendUnified(partition_pin_state, state.chunk_state, input, append_sel, actual_append_count);
72-
data_size += partition.SizeInBytes() - size_before;
72+
data_size += partition.data_size - size_before;
7373
} else {
7474
// Compute the heap sizes for the whole chunk
7575
if (!layout.AllConstant()) {
@@ -103,9 +103,9 @@ void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, TupleD
103103

104104
state.chunk_state.heap_sizes.Reference(input.heap_sizes);
105105

106-
const auto size_before = partition.SizeInBytes();
106+
const auto size_before = partition.data_size;
107107
partition.Build(partition_pin_state, state.chunk_state, 0, append_count);
108-
data_size += partition.SizeInBytes() - size_before;
108+
data_size += partition.data_size - size_before;
109109

110110
partition.CopyRows(state.chunk_state, input, *FlatVector::IncrementalSelectionVector(), append_count);
111111
} else {
@@ -224,9 +224,9 @@ void PartitionedTupleData::BuildBufferSpace(PartitionedTupleDataAppendState &sta
224224
const auto partition_offset = partition_entry.offset - partition_length;
225225

226226
// Build out the buffer space for this partition
227-
const auto size_before = partition.SizeInBytes();
227+
const auto size_before = partition.data_size;
228228
partition.Build(partition_pin_state, state.chunk_state, partition_offset, partition_length);
229-
data_size += partition.SizeInBytes() - size_before;
229+
data_size += partition.data_size - size_before;
230230
}
231231
}
232232

@@ -337,7 +337,7 @@ idx_t PartitionedTupleData::Count() const {
337337
}
338338

339339
idx_t PartitionedTupleData::SizeInBytes() const {
340-
return data_size;
340+
return data_size + stl_allocator->AllocationSize();
341341
}
342342

343343
idx_t PartitionedTupleData::PartitionCount() const {
@@ -361,7 +361,7 @@ void PartitionedTupleData::Verify() const {
361361
for (auto &partition : partitions) {
362362
partition->Verify();
363363
total_count += partition->Count();
364-
total_size += partition->SizeInBytes();
364+
total_size += partition->data_size;
365365
}
366366
D_ASSERT(total_count == this->count);
367367
D_ASSERT(total_size == this->data_size);

src/duckdb/src/common/types/row/tuple_data_collection.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ idx_t TupleDataCollection::ChunkCount() const {
9393
}
9494

9595
idx_t TupleDataCollection::SizeInBytes() const {
96-
return data_size;
96+
return data_size + stl_allocator->AllocationSize();
9797
}
9898

9999
void TupleDataCollection::Unpin() {

0 commit comments

Comments
 (0)