Skip to content

Commit 77c3a4e

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to 9fffe27818
1 parent 8ee333f commit 77c3a4e

36 files changed

Lines changed: 461 additions & 221 deletions

src/duckdb/extension/parquet/include/parquet_crypto.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class ParquetKeys : public ObjectCacheEntry {
3636
public:
3737
static string ObjectType();
3838
string GetObjectType() override;
39+
optional_idx GetEstimatedCacheMemory() const override {
40+
return optional_idx {};
41+
}
3942

4043
private:
4144
unordered_map<string, string> keys;

src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class ParquetFileMetadataCache : public ObjectCacheEntry {
4040
public:
4141
static string ObjectType();
4242
string GetObjectType() override;
43+
optional_idx GetEstimatedCacheMemory() const override;
4344

4445
bool IsValid(CachingFileHandle &new_handle) const;
4546
//! Return if a cache entry is valid.

src/duckdb/extension/parquet/include/parquet_reader.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#pragma once
1010

1111
#include "duckdb.hpp"
12+
#include "duckdb/common/helper.hpp"
1213
#include "duckdb/storage/caching_file_system.hpp"
1314
#include "duckdb/common/common.hpp"
1415
#include "duckdb/common/encryption_functions.hpp"
@@ -206,7 +207,9 @@ class ParquetReader : public BaseFileReader {
206207
void AddVirtualColumn(column_t virtual_column_id) override;
207208

208209
void GetPartitionStats(vector<PartitionStatistics> &result);
209-
static void GetPartitionStats(const duckdb_parquet::FileMetaData &metadata, vector<PartitionStatistics> &result);
210+
static void GetPartitionStats(const duckdb_parquet::FileMetaData &metadata, vector<PartitionStatistics> &result,
211+
optional_ptr<ParquetColumnSchema> root_schema = nullptr,
212+
optional_ptr<ParquetOptions> parquet_options = nullptr);
210213
static bool MetadataCacheEnabled(ClientContext &context);
211214
static shared_ptr<ParquetFileMetadataCache> GetMetadataCacheEntry(ClientContext &context, const OpenFileInfo &file);
212215

src/duckdb/extension/parquet/parquet_crypto.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@ namespace duckdb {
1717

1818
ParquetKeys &ParquetKeys::Get(ClientContext &context) {
1919
auto &cache = ObjectCache::GetObjectCache(context);
20-
if (!cache.Get<ParquetKeys>(ParquetKeys::ObjectType())) {
21-
cache.Put(ParquetKeys::ObjectType(), make_shared_ptr<ParquetKeys>());
22-
}
23-
return *cache.Get<ParquetKeys>(ParquetKeys::ObjectType());
20+
return *cache.GetOrCreate<ParquetKeys>(ParquetKeys::ObjectType());
2421
}
2522

2623
void ParquetKeys::AddKey(const string &key_name, const string &key) {

src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,31 @@ string ParquetFileMetadataCache::GetObjectType() {
2323
return ObjectType();
2424
}
2525

26+
optional_idx ParquetFileMetadataCache::GetEstimatedCacheMemory() const {
27+
// Base memory consumption
28+
idx_t memory = sizeof(*this);
29+
30+
if (metadata) {
31+
const auto num_cols = metadata->schema.size();
32+
memory += sizeof(duckdb_parquet::FileMetaData);
33+
memory += num_cols * sizeof(duckdb_parquet::SchemaElement);
34+
memory += metadata->row_groups.size() * sizeof(duckdb_parquet::RowGroup) +
35+
num_cols * sizeof(duckdb_parquet::ColumnChunk);
36+
}
37+
if (geo_metadata) {
38+
memory +=
39+
sizeof(GeoParquetFileMetadata) + geo_metadata->GetColumnMeta().size() * sizeof(GeoParquetColumnMetadata);
40+
}
41+
if (crypto_metadata) {
42+
memory += sizeof(FileCryptoMetaData);
43+
}
44+
45+
memory += footer_size;
46+
memory += version_tag.size();
47+
48+
return memory;
49+
}
50+
2651
bool ParquetFileMetadataCache::IsValid(CachingFileHandle &new_handle) const {
2752
return ExternalFileCache::IsValid(validate, version_tag, last_modified, new_handle.GetVersionTag(),
2853
new_handle.GetLastModifiedTime());

src/duckdb/extension/parquet/parquet_multi_file_info.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -525,17 +525,18 @@ shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &con
525525

526526
shared_ptr<BaseUnionData> ParquetReader::GetUnionData(idx_t file_idx) {
527527
auto result = make_uniq<ParquetUnionData>(file);
528+
result->names.reserve(columns.size());
529+
result->types.reserve(columns.size());
528530
for (auto &column : columns) {
529531
result->names.push_back(column.name);
530532
result->types.push_back(column.type);
531533
}
534+
535+
result->options = parquet_options;
536+
result->metadata = metadata;
532537
if (file_idx == 0) {
533-
result->options = parquet_options;
534-
result->metadata = metadata;
535538
result->reader = shared_from_this();
536539
} else {
537-
result->options = std::move(parquet_options);
538-
result->metadata = std::move(metadata);
539540
result->root_schema = std::move(root_schema);
540541
}
541542
return std::move(result);

src/duckdb/extension/parquet/parquet_reader.cpp

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,30 @@
11
#include "parquet_reader.hpp"
22

3-
#include "reader/boolean_column_reader.hpp"
4-
#include "reader/callback_column_reader.hpp"
3+
#include "duckdb/common/optional_ptr.hpp"
4+
#include "duckdb/function/partition_stats.hpp"
5+
#include "parquet_types.h"
56
#include "column_reader.hpp"
6-
#include "duckdb.hpp"
77
#include "reader/expression_column_reader.hpp"
88
#include "parquet_geometry.hpp"
99
#include "reader/list_column_reader.hpp"
1010
#include "parquet_crypto.hpp"
1111
#include "parquet_file_metadata_cache.hpp"
1212
#include "parquet_statistics.hpp"
13-
#include "parquet_timestamp.hpp"
1413
#include "mbedtls_wrapper.hpp"
1514
#include "reader/row_number_column_reader.hpp"
16-
#include "reader/string_column_reader.hpp"
1715
#include "reader/variant_column_reader.hpp"
1816
#include "reader/struct_column_reader.hpp"
19-
#include "reader/templated_column_reader.hpp"
2017
#include "thrift_tools.hpp"
2118
#include "duckdb/main/config.hpp"
2219
#include "duckdb/common/encryption_state.hpp"
2320
#include "duckdb/common/file_system.hpp"
2421
#include "duckdb/common/helper.hpp"
25-
#include "duckdb/common/hive_partitioning.hpp"
2622
#include "duckdb/common/string_util.hpp"
2723
#include "duckdb/planner/table_filter.hpp"
2824
#include "duckdb/storage/object_cache.hpp"
2925
#include "duckdb/optimizer/statistics_propagator.hpp"
3026
#include "duckdb/planner/table_filter_state.hpp"
3127
#include "duckdb/common/multi_file/multi_file_reader.hpp"
32-
#include "duckdb/logging/log_manager.hpp"
33-
#include "duckdb/common/multi_file/multi_file_column_mapper.hpp"
34-
#include "duckdb/common/encryption_functions.hpp"
35-
36-
#include <cassert>
37-
#include <chrono>
38-
#include <cstring>
39-
#include <sstream>
4028

4129
namespace duckdb {
4230

@@ -176,7 +164,7 @@ LoadMetadata(ClientContext &context, Allocator &allocator, CachingFileHandle &fi
176164
}
177165
ParquetCrypto::GenerateAdditionalAuthenticatedData(allocator, aad_crypto_metadata);
178166
ParquetCrypto::Read(*metadata, *file_proto, encryption_config->GetFooterKey(), encryption_util,
179-
std::move(aad_crypto_metadata));
167+
aad_crypto_metadata);
180168
} else {
181169
metadata->read(file_proto.get());
182170
}
@@ -650,8 +638,8 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d
650638
if (is_repeated) {
651639
auto list_type = LogicalType::LIST(result.type);
652640
vector<ParquetColumnSchema> list_child = {std::move(result)};
653-
result = ParquetColumnSchema::FromChildSchemas(s_ele.name, std::move(list_type), max_define, max_repeat,
654-
this_idx, next_file_idx, std::move(list_child));
641+
result = ParquetColumnSchema::FromChildSchemas(s_ele.name, list_type, max_define, max_repeat, this_idx,
642+
next_file_idx, std::move(list_child));
655643
}
656644
result.parent_schema_index = this_idx;
657645
return result;
@@ -665,8 +653,8 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d
665653
if (s_ele.repetition_type == FieldRepetitionType::REPEATED) {
666654
auto list_type = LogicalType::LIST(result.type);
667655
vector<ParquetColumnSchema> list_child = {std::move(result)};
668-
return ParquetColumnSchema::FromChildSchemas(s_ele.name, std::move(list_type), max_define, max_repeat,
669-
this_idx, next_file_idx, std::move(list_child));
656+
return ParquetColumnSchema::FromChildSchemas(s_ele.name, list_type, max_define, max_repeat, this_idx,
657+
next_file_idx, std::move(list_child));
670658
}
671659

672660
return result;
@@ -1233,17 +1221,64 @@ void ParquetReader::InitializeScan(ClientContext &context, ParquetReaderScanStat
12331221
}
12341222

12351223
void ParquetReader::GetPartitionStats(vector<PartitionStatistics> &result) {
1236-
GetPartitionStats(*GetFileMetadata(), result);
1224+
GetPartitionStats(*GetFileMetadata(), result, *root_schema, parquet_options);
12371225
}
12381226

1239-
void ParquetReader::GetPartitionStats(const duckdb_parquet::FileMetaData &metadata,
1240-
vector<PartitionStatistics> &result) {
1227+
struct ParquetPartitionRowGroup : public PartitionRowGroup {
1228+
ParquetPartitionRowGroup(const duckdb_parquet::FileMetaData &metadata_p,
1229+
optional_ptr<ParquetColumnSchema> root_schema_p,
1230+
optional_ptr<ParquetOptions> parquet_options_p, const idx_t row_group_idx_p)
1231+
: metadata(metadata_p), root_schema(root_schema_p), parquet_options(parquet_options_p),
1232+
row_group_idx(row_group_idx_p) {
1233+
}
1234+
1235+
const duckdb_parquet::FileMetaData &metadata;
1236+
const optional_ptr<ParquetColumnSchema> root_schema;
1237+
const optional_ptr<ParquetOptions> parquet_options;
1238+
const idx_t row_group_idx;
1239+
1240+
unique_ptr<BaseStatistics> GetColumnStatistics(const StorageIndex &storage_index) override {
1241+
const idx_t primary_index = storage_index.GetPrimaryIndex();
1242+
D_ASSERT(metadata.row_groups.size() > row_group_idx);
1243+
D_ASSERT(root_schema->children.size() > primary_index);
1244+
1245+
const auto &row_group = metadata.row_groups[row_group_idx];
1246+
const auto &column_schema = root_schema->children[primary_index];
1247+
return column_schema.Stats(metadata, *parquet_options, row_group_idx, row_group.columns);
1248+
}
1249+
1250+
bool MinMaxIsExact(const BaseStatistics &, const StorageIndex &storage_index) override {
1251+
const idx_t primary_index = storage_index.GetPrimaryIndex();
1252+
D_ASSERT(metadata.row_groups.size() > row_group_idx);
1253+
D_ASSERT(root_schema->children.size() > primary_index);
1254+
1255+
const auto &row_group = metadata.row_groups[row_group_idx];
1256+
const auto &column_chunk = row_group.columns[primary_index];
1257+
1258+
if (column_chunk.__isset.meta_data && column_chunk.meta_data.__isset.statistics &&
1259+
column_chunk.meta_data.statistics.__isset.is_min_value_exact &&
1260+
column_chunk.meta_data.statistics.__isset.is_max_value_exact) {
1261+
const auto &stats = column_chunk.meta_data.statistics;
1262+
return stats.is_min_value_exact && stats.is_max_value_exact;
1263+
}
1264+
return false;
1265+
}
1266+
};
1267+
1268+
void ParquetReader::GetPartitionStats(const duckdb_parquet::FileMetaData &metadata, vector<PartitionStatistics> &result,
1269+
optional_ptr<ParquetColumnSchema> root_schema,
1270+
optional_ptr<ParquetOptions> parquet_options) {
12411271
idx_t offset = 0;
1242-
for (auto &row_group : metadata.row_groups) {
1272+
for (idx_t i = 0; i < metadata.row_groups.size(); i++) {
1273+
auto &row_group = metadata.row_groups[i];
12431274
PartitionStatistics partition_stats;
12441275
partition_stats.row_start = offset;
12451276
partition_stats.count = row_group.num_rows;
12461277
partition_stats.count_type = CountType::COUNT_EXACT;
1278+
if (root_schema && parquet_options) {
1279+
partition_stats.partition_row_group =
1280+
make_shared_ptr<ParquetPartitionRowGroup>(metadata, root_schema, parquet_options, i);
1281+
}
12471282
offset += row_group.num_rows;
12481283
result.push_back(partition_stats);
12491284
}

src/duckdb/src/catalog/default/default_table_functions.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,17 @@ FROM histogram_values(source, col_name, bin_count := bin_count, technique := tec
7070
SELECT * EXCLUDE (message), UNNEST(parse_duckdb_log_message(log_type, message))
7171
FROM duckdb_logs(denormalized_table=1)
7272
WHERE type ILIKE log_type
73+
)"},
74+
{DEFAULT_SCHEMA, "duckdb_profiling_settings", {}, {}, R"(
75+
SELECT * EXCLUDE(input_type, scope, aliases)
76+
FROM duckdb_settings()
77+
WHERE name IN (
78+
'enable_profiling',
79+
'profiling_coverage',
80+
'profiling_output',
81+
'profiling_mode',
82+
'custom_profiling_settings'
83+
);
7384
)"},
7485
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}
7586
};

src/duckdb/src/common/arrow/schema_metadata.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,15 @@ unsafe_unique_array<char> ArrowSchemaMetadata::SerializeMetadata() const {
108108
memcpy(metadata_ptr, &key_size, sizeof(int32_t));
109109
metadata_ptr += sizeof(int32_t);
110110
// Key
111-
memcpy(metadata_ptr, key.c_str(), key_size);
111+
memcpy(metadata_ptr, key.c_str(), key.size());
112112
metadata_ptr += key_size;
113113
const std::string &value = pair.second;
114114
const int32_t value_size = static_cast<int32_t>(value.size());
115115
// Length of the value (int32)
116116
memcpy(metadata_ptr, &value_size, sizeof(int32_t));
117117
metadata_ptr += sizeof(int32_t);
118118
// Value
119-
memcpy(metadata_ptr, value.c_str(), value_size);
119+
memcpy(metadata_ptr, value.c_str(), value.size());
120120
metadata_ptr += value_size;
121121
}
122122
return metadata_array_ptr;

src/duckdb/src/common/encryption_key_manager.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,7 @@ void EncryptionKey::UnlockEncryptionKey(data_ptr_t key, idx_t key_len) {
5151
}
5252

5353
EncryptionKeyManager &EncryptionKeyManager::GetInternal(ObjectCache &cache) {
54-
if (!cache.Get<EncryptionKeyManager>(EncryptionKeyManager::ObjectType())) {
55-
cache.Put(EncryptionKeyManager::ObjectType(), make_shared_ptr<EncryptionKeyManager>());
56-
}
57-
return *cache.Get<EncryptionKeyManager>(EncryptionKeyManager::ObjectType());
54+
return *cache.GetOrCreate<EncryptionKeyManager>(EncryptionKeyManager::ObjectType());
5855
}
5956

6057
EncryptionKeyManager &EncryptionKeyManager::Get(ClientContext &context) {

0 commit comments

Comments
 (0)