Skip to content

Commit f39e420

Browse files
Update vendored DuckDB sources to 56852a4
1 parent 56852a4 commit f39e420

43 files changed

Lines changed: 574 additions & 239 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/duckdb/extension/jemalloc/jemalloc/include/jemalloc/jemalloc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
extern "C" {
66
#endif
77

8-
// DuckDB uses a 5s decay
9-
#define DUCKDB_JEMALLOC_DECAY 5
8+
// DuckDB uses a 1s decay
9+
#define DUCKDB_JEMALLOC_DECAY 1
1010

1111
/* Defined if __attribute__((...)) syntax is supported. */
1212
#define JEMALLOC_HAVE_ATTR

src/duckdb/extension/json/json_multi_file_info.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -474,11 +474,12 @@ void ReadJSONFunction(ClientContext &context, JSONReader &json_reader, JSONScanG
474474
const auto count = lstate.Read();
475475
yyjson_val **values = scan_state.values;
476476

477+
auto &column_ids = json_reader.reader_data.column_ids;
477478
if (!gstate.names.empty()) {
478479
vector<Vector *> result_vectors;
479-
result_vectors.reserve(gstate.column_ids.size());
480-
for (const auto &col_idx : gstate.column_ids) {
481-
result_vectors.emplace_back(&output.data[col_idx]);
480+
result_vectors.reserve(column_ids.size());
481+
for (idx_t i = 0; i < column_ids.size(); i++) {
482+
result_vectors.emplace_back(&output.data[i]);
482483
}
483484

484485
D_ASSERT(gstate.json_data.options.record_type != JSONRecordType::AUTO_DETECT);
@@ -521,9 +522,8 @@ void ReadJSONObjectsFunction(ClientContext &context, JSONReader &json_reader, JS
521522

522523
if (!gstate.names.empty()) {
523524
// Create the strings without copying them
524-
const auto col_idx = gstate.column_ids[0];
525-
auto strings = FlatVector::GetData<string_t>(output.data[col_idx]);
526-
auto &validity = FlatVector::Validity(output.data[col_idx]);
525+
auto strings = FlatVector::GetData<string_t>(output.data[0]);
526+
auto &validity = FlatVector::Validity(output.data[0]);
527527
for (idx_t i = 0; i < count; i++) {
528528
if (objects[i]) {
529529
strings[i] = string_t(units[i].pointer, units[i].size);

src/duckdb/extension/parquet/parquet_reader.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,9 +1120,8 @@ bool ParquetReader::ScanInternal(ClientContext &context, ParquetReaderScanState
11201120
} else {
11211121
auto local_idx = filter_entry.GetLocalIndex();
11221122
auto column_id = reader_data.column_ids[local_idx];
1123-
auto result_idx = reader_data.column_mapping[local_idx];
11241123

1125-
auto &result_vector = result.data[result_idx];
1124+
auto &result_vector = result.data[local_idx];
11261125
auto &child_reader = root_reader.GetChildReader(column_id);
11271126
child_reader.Filter(scan_count, define_ptr, repeat_ptr, result_vector, scan_filter.filter,
11281127
*scan_filter.filter_state, state.sel, filter_count, i == 0);
@@ -1142,7 +1141,7 @@ bool ParquetReader::ScanInternal(ClientContext &context, ParquetReaderScanState
11421141
root_reader.GetChildReader(file_col_idx).Skip(result.size());
11431142
continue;
11441143
}
1145-
auto &result_vector = result.data[reader_data.column_mapping[col_idx].GetIndex()];
1144+
auto &result_vector = result.data[i];
11461145
auto &child_reader = root_reader.GetChildReader(file_col_idx);
11471146
child_reader.Select(result.size(), define_ptr, repeat_ptr, result_vector, state.sel, filter_count);
11481147
}
@@ -1153,8 +1152,7 @@ bool ParquetReader::ScanInternal(ClientContext &context, ParquetReaderScanState
11531152
for (idx_t i = 0; i < reader_data.column_ids.size(); i++) {
11541153
auto col_idx = MultiFileLocalIndex(i);
11551154
auto file_col_idx = reader_data.column_ids[col_idx];
1156-
auto global_col_idx = reader_data.column_mapping[col_idx];
1157-
auto &result_vector = result.data[global_col_idx];
1155+
auto &result_vector = result.data[i];
11581156
auto &child_reader = root_reader.GetChildReader(file_col_idx);
11591157
auto rows_read = child_reader.Read(scan_count, define_ptr, repeat_ptr, result_vector);
11601158
if (rows_read != scan_count) {

src/duckdb/src/common/adbc/adbc.cpp

Lines changed: 68 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include "duckdb/common/adbc/options.h"
1818
#include "duckdb/common/adbc/single_batch_array_stream.hpp"
1919
#include "duckdb/function/table/arrow.hpp"
20-
20+
#include "duckdb/common/adbc/wrappers.hpp"
2121
#include <stdlib.h>
2222
#include <string.h>
2323

@@ -249,7 +249,9 @@ AdbcStatusCode ConnectionNew(struct AdbcConnection *connection, struct AdbcError
249249
return ADBC_STATUS_INVALID_ARGUMENT;
250250
}
251251

252-
connection->private_data = nullptr;
252+
auto connection_wrapper = new duckdb::DuckDBAdbcConnectionWrapper();
253+
connection_wrapper->connection = nullptr;
254+
connection->private_data = connection_wrapper;
253255
return ADBC_STATUS_OK;
254256
}
255257

@@ -263,45 +265,65 @@ AdbcStatusCode ExecuteQuery(duckdb::Connection *conn, const char *query, struct
263265
return ADBC_STATUS_OK;
264266
}
265267

268+
AdbcStatusCode InternalSetOption(duckdb::Connection &conn, std::unordered_map<std::string, std::string> &options,
269+
struct AdbcError *error) {
270+
// If we got here, the options have already been validated and are acceptable
271+
for (auto &option : options) {
272+
if (strcmp(option.first.c_str(), ADBC_CONNECTION_OPTION_AUTOCOMMIT) == 0) {
273+
if (strcmp(option.second.c_str(), ADBC_OPTION_VALUE_ENABLED) == 0) {
274+
if (conn.HasActiveTransaction()) {
275+
AdbcStatusCode status = ExecuteQuery(&conn, "COMMIT", error);
276+
if (status != ADBC_STATUS_OK) {
277+
options.clear();
278+
return status;
279+
}
280+
}
281+
} else if (strcmp(option.second.c_str(), ADBC_OPTION_VALUE_DISABLED) == 0) {
282+
if (!conn.HasActiveTransaction()) {
283+
AdbcStatusCode status = ExecuteQuery(&conn, "START TRANSACTION", error);
284+
if (status != ADBC_STATUS_OK) {
285+
options.clear();
286+
return status;
287+
}
288+
}
289+
}
290+
}
291+
}
292+
options.clear();
293+
return ADBC_STATUS_OK;
294+
}
266295
AdbcStatusCode ConnectionSetOption(struct AdbcConnection *connection, const char *key, const char *value,
267296
struct AdbcError *error) {
268297
if (!connection) {
269298
SetError(error, "Connection is not set");
270299
return ADBC_STATUS_INVALID_ARGUMENT;
271300
}
272-
273-
auto conn = static_cast<duckdb::Connection *>(connection->private_data);
301+
std::string key_string = std::string(key);
302+
std::string key_value = std::string(value);
303+
auto conn_wrapper = static_cast<duckdb::DuckDBAdbcConnectionWrapper *>(connection->private_data);
274304
if (strcmp(key, ADBC_CONNECTION_OPTION_AUTOCOMMIT) == 0) {
275305
if (strcmp(value, ADBC_OPTION_VALUE_ENABLED) == 0) {
276-
if (conn->HasActiveTransaction()) {
277-
AdbcStatusCode status = ExecuteQuery(conn, "COMMIT", error);
278-
if (status != ADBC_STATUS_OK) {
279-
return status;
280-
}
281-
} else {
282-
// no-op
283-
}
306+
conn_wrapper->options[key_string] = key_value;
284307
} else if (strcmp(value, ADBC_OPTION_VALUE_DISABLED) == 0) {
285-
if (conn->HasActiveTransaction()) {
286-
// no-op
287-
} else {
288-
// begin
289-
AdbcStatusCode status = ExecuteQuery(conn, "START TRANSACTION", error);
290-
if (status != ADBC_STATUS_OK) {
291-
return status;
292-
}
293-
}
308+
conn_wrapper->options[key_string] = key_value;
294309
} else {
295310
auto error_message = "Invalid connection option value " + std::string(key) + "=" + std::string(value);
296311
SetError(error, error_message);
297312
return ADBC_STATUS_INVALID_ARGUMENT;
298313
}
314+
} else {
315+
// This is an unknown option to the DuckDB driver
316+
auto error_message =
317+
"Unknown connection option " + std::string(key) + "=" + (value ? std::string(value) : "(NULL)");
318+
SetError(error, error_message);
319+
return ADBC_STATUS_NOT_IMPLEMENTED;
320+
}
321+
if (!conn_wrapper->connection) {
322+
// If the connection has not yet been initialized, we just return here.
299323
return ADBC_STATUS_OK;
300324
}
301-
auto error_message =
302-
"Unknown connection option " + std::string(key) + "=" + (value ? std::string(value) : "(NULL)");
303-
SetError(error, error_message);
304-
return ADBC_STATUS_NOT_IMPLEMENTED;
325+
auto conn = reinterpret_cast<duckdb::Connection *>(conn_wrapper->connection);
326+
return InternalSetOption(*conn, conn_wrapper->options, error);
305327
}
306328

307329
AdbcStatusCode ConnectionReadPartition(struct AdbcConnection *connection, const uint8_t *serialized_partition,
@@ -323,7 +345,8 @@ AdbcStatusCode ConnectionCommit(struct AdbcConnection *connection, struct AdbcEr
323345
SetError(error, "Connection is not set");
324346
return ADBC_STATUS_INVALID_ARGUMENT;
325347
}
326-
auto conn = static_cast<duckdb::Connection *>(connection->private_data);
348+
auto conn_wrapper = static_cast<duckdb::DuckDBAdbcConnectionWrapper *>(connection->private_data);
349+
auto conn = reinterpret_cast<duckdb::Connection *>(conn_wrapper->connection);
327350
if (!conn->HasActiveTransaction()) {
328351
SetError(error, "No active transaction, cannot commit");
329352
return ADBC_STATUS_INVALID_STATE;
@@ -341,7 +364,8 @@ AdbcStatusCode ConnectionRollback(struct AdbcConnection *connection, struct Adbc
341364
SetError(error, "Connection is not set");
342365
return ADBC_STATUS_INVALID_ARGUMENT;
343366
}
344-
auto conn = static_cast<duckdb::Connection *>(connection->private_data);
367+
auto conn_wrapper = static_cast<duckdb::DuckDBAdbcConnectionWrapper *>(connection->private_data);
368+
auto conn = reinterpret_cast<duckdb::Connection *>(conn_wrapper->connection);
345369
if (!conn->HasActiveTransaction()) {
346370
SetError(error, "No active transaction, cannot rollback");
347371
return ADBC_STATUS_INVALID_STATE;
@@ -479,16 +503,25 @@ AdbcStatusCode ConnectionInit(struct AdbcConnection *connection, struct AdbcData
479503
return ADBC_STATUS_INVALID_ARGUMENT;
480504
}
481505
auto database_wrapper = static_cast<DuckDBAdbcDatabaseWrapper *>(database->private_data);
506+
auto conn_wrapper = static_cast<duckdb::DuckDBAdbcConnectionWrapper *>(connection->private_data);
507+
conn_wrapper->connection = nullptr;
482508

483-
connection->private_data = nullptr;
484-
auto res =
485-
duckdb_connect(database_wrapper->database, reinterpret_cast<duckdb_connection *>(&connection->private_data));
486-
return CheckResult(res, error, "Failed to connect to Database");
509+
auto res = duckdb_connect(database_wrapper->database, &conn_wrapper->connection);
510+
auto adbc_status = CheckResult(res, error, "Failed to connect to Database");
511+
if (adbc_status != ADBC_STATUS_OK) {
512+
return adbc_status;
513+
}
514+
// We might have options to set
515+
auto conn = reinterpret_cast<duckdb::Connection *>(conn_wrapper->connection);
516+
return InternalSetOption(*conn, conn_wrapper->options, error);
487517
}
488518

489519
AdbcStatusCode ConnectionRelease(struct AdbcConnection *connection, struct AdbcError *error) {
490520
if (connection && connection->private_data) {
491-
duckdb_disconnect(reinterpret_cast<duckdb_connection *>(&connection->private_data));
521+
auto conn_wrapper = static_cast<duckdb::DuckDBAdbcConnectionWrapper *>(connection->private_data);
522+
auto conn = reinterpret_cast<duckdb::Connection *>(conn_wrapper->connection);
523+
duckdb_disconnect(reinterpret_cast<duckdb_connection *>(&conn));
524+
delete conn_wrapper;
492525
connection->private_data = nullptr;
493526
}
494527
return ADBC_STATUS_OK;
@@ -638,7 +671,9 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem
638671
}
639672

640673
statement->private_data = statement_wrapper;
641-
statement_wrapper->connection = static_cast<duckdb_connection>(connection->private_data);
674+
auto conn_wrapper = static_cast<duckdb::DuckDBAdbcConnectionWrapper *>(connection->private_data);
675+
676+
statement_wrapper->connection = conn_wrapper->connection;
642677
statement_wrapper->statement = nullptr;
643678
statement_wrapper->result = nullptr;
644679
statement_wrapper->ingestion_stream.release = nullptr;

src/duckdb/src/common/adbc/driver_manager.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1080,13 +1080,15 @@ AdbcStatusCode AdbcConnectionGetTableTypes(struct AdbcConnection *connection, st
10801080

10811081
AdbcStatusCode AdbcConnectionInit(struct AdbcConnection *connection, struct AdbcDatabase *database,
10821082
struct AdbcError *error) {
1083+
10831084
if (!connection->private_data) {
10841085
SetError(error, "Must call AdbcConnectionNew first");
10851086
return ADBC_STATUS_INVALID_STATE;
10861087
} else if (!database->private_driver) {
10871088
SetError(error, "Database is not initialized");
10881089
return ADBC_STATUS_INVALID_ARGUMENT;
10891090
}
1091+
10901092
TempConnection *args = reinterpret_cast<TempConnection *>(connection->private_data);
10911093
connection->private_data = nullptr;
10921094
std::unordered_map<std::string, std::string> options = std::move(args->options);
@@ -1176,7 +1178,7 @@ AdbcStatusCode AdbcConnectionRollback(struct AdbcConnection *connection, struct
11761178

11771179
AdbcStatusCode AdbcConnectionSetOption(struct AdbcConnection *connection, const char *key, const char *value,
11781180
struct AdbcError *error) {
1179-
if (!connection->private_data) {
1181+
if (!connection || !connection->private_data) {
11801182
SetError(error, "AdbcConnectionSetOption: must AdbcConnectionNew first");
11811183
return ADBC_STATUS_INVALID_STATE;
11821184
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "duckdb/common/arrow/arrow_appender.hpp"
2+
#include "duckdb/common/arrow/appender/null_data.hpp"
3+
4+
namespace duckdb {
5+
6+
void ArrowNullData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
7+
// nop
8+
}
9+
10+
void ArrowNullData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
11+
idx_t size = to - from;
12+
append_data.row_count += size;
13+
}
14+
15+
void ArrowNullData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
16+
result->n_buffers = 0;
17+
}
18+
19+
} // namespace duckdb

src/duckdb/src/common/arrow/arrow_appender.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ static void InitializeAppenderForType(ArrowAppendData &append_data) {
144144
static void InitializeFunctionPointers(ArrowAppendData &append_data, const LogicalType &type) {
145145
// handle special logical types
146146
switch (type.id()) {
147+
case LogicalTypeId::SQLNULL:
148+
InitializeAppenderForType<ArrowNullData>(append_data);
149+
break;
147150
case LogicalTypeId::BOOLEAN:
148151
InitializeAppenderForType<ArrowBoolData>(append_data);
149152
break;

src/duckdb/src/common/arrow/arrow_converter.cpp

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,30 @@ void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, c
6161
void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
6262
ClientProperties &options, ClientContext &context);
6363

64+
void SetArrowStructFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
65+
ClientProperties &options, ClientContext &context, bool map_is_parent = false) {
66+
child.format = "+s";
67+
auto &child_types = StructType::GetChildTypes(type);
68+
child.n_children = NumericCast<int64_t>(child_types.size());
69+
root_holder.nested_children.emplace_back();
70+
root_holder.nested_children.back().resize(child_types.size());
71+
root_holder.nested_children_ptr.emplace_back();
72+
root_holder.nested_children_ptr.back().resize(child_types.size());
73+
for (idx_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
74+
root_holder.nested_children_ptr.back()[type_idx] = &root_holder.nested_children.back()[type_idx];
75+
}
76+
child.children = &root_holder.nested_children_ptr.back()[0];
77+
for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
78+
InitializeChild(*child.children[type_idx], root_holder);
79+
root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
80+
child.children[type_idx]->name = root_holder.owned_type_names.back().get();
81+
SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options, context);
82+
}
83+
if (map_is_parent) {
84+
child.children[0]->flags = 0; // Set the 'keys' field to non-nullable
85+
}
86+
}
87+
6488
void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
6589
ClientProperties &options, ClientContext &context) {
6690
child.format = "+m";
@@ -74,7 +98,7 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
7498
child.children = &root_holder.nested_children_ptr.back()[0];
7599
child.children[0]->name = "entries";
76100
child.children[0]->flags = 0; // Set the 'entries' field to non-nullable
77-
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options, context);
101+
SetArrowStructFormat(root_holder, **child.children, ListType::GetChildType(type), options, context, true);
78102
}
79103

80104
bool SetArrowExtension(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
@@ -265,26 +289,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
265289
break;
266290
}
267291
case LogicalTypeId::STRUCT: {
268-
child.format = "+s";
269-
auto &child_types = StructType::GetChildTypes(type);
270-
child.n_children = NumericCast<int64_t>(child_types.size());
271-
root_holder.nested_children.emplace_back();
272-
root_holder.nested_children.back().resize(child_types.size());
273-
root_holder.nested_children_ptr.emplace_back();
274-
root_holder.nested_children_ptr.back().resize(child_types.size());
275-
for (idx_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
276-
root_holder.nested_children_ptr.back()[type_idx] = &root_holder.nested_children.back()[type_idx];
277-
}
278-
child.children = &root_holder.nested_children_ptr.back()[0];
279-
for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
280-
281-
InitializeChild(*child.children[type_idx], root_holder);
282-
283-
root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
284-
285-
child.children[type_idx]->name = root_holder.owned_type_names.back().get();
286-
SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options, context);
287-
}
292+
SetArrowStructFormat(root_holder, child, type, options, context);
288293
break;
289294
}
290295
case LogicalTypeId::ARRAY: {

0 commit comments

Comments
 (0)