Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/duckdb/src/function/table/direct_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ void DirectFileReader::Scan(ClientContext &context, GlobalTableFunctionState &gl
}

auto files = state.file_list;

auto &regular_fs = FileSystem::GetFileSystem(context);
auto fs = CachingFileSystem::Get(context);
idx_t out_idx = 0;

Expand All @@ -65,6 +67,14 @@ void DirectFileReader::Scan(ClientContext &context, GlobalTableFunctionState &gl
flags |= FileFlags::FILE_FLAGS_DIRECT_IO;
}
file_handle = fs.OpenFile(QueryContext(context), file, flags);
} else {
// At least verify that the file exist
// The globbing behavior in remote filesystems can lead to files being listed that do not actually exist
if (FileSystem::IsRemoteFile(file.path) && !regular_fs.FileExists(file.path)) {
output.SetCardinality(0);
done = true;
return;
}
}

for (idx_t col_idx = 0; col_idx < state.column_ids.size(); col_idx++) {
Expand Down
66 changes: 65 additions & 1 deletion src/duckdb/src/function/table/read_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,43 @@

namespace duckdb {

namespace {

//------------------------------------------------------------------------------
// DirectMultiFileInfo
//------------------------------------------------------------------------------

template <class OP>
struct DirectMultiFileInfo : MultiFileReaderInterface {
static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);
unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
optional_ptr<TableFunctionInfo> info) override;
bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
BaseFileReaderOptions &options, vector<string> &expected_names,
vector<LogicalType> &expected_types) override;
bool ParseOption(ClientContext &context, const string &key, const Value &val, MultiFileOptions &file_options,
BaseFileReaderOptions &options) override;
unique_ptr<TableFunctionData> InitializeBindData(MultiFileBindData &multi_file_data,
unique_ptr<BaseFileReaderOptions> options) override;
void BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
MultiFileBindData &bind_data) override;
optional_idx MaxThreads(const MultiFileBindData &bind_data_p, const MultiFileGlobalState &global_state,
FileExpandResult expand_result) override;
unique_ptr<GlobalTableFunctionState> InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data,
MultiFileGlobalState &global_state) override;
unique_ptr<LocalTableFunctionState> InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &) override;
shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
BaseUnionData &union_data, const MultiFileBindData &bind_data_p) override;
shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
const OpenFileInfo &file, idx_t file_idx,
const MultiFileBindData &bind_data) override;
shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const OpenFileInfo &file,
BaseFileReaderOptions &options,
const MultiFileOptions &file_options) override;
unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
FileGlobInput GetGlobInput() override;
};

template <class OP>
unique_ptr<MultiFileReaderInterface> DirectMultiFileInfo<OP>::CreateInterface(ClientContext &context) {
return make_uniq<DirectMultiFileInfo>();
Expand Down Expand Up @@ -132,14 +165,45 @@ FileGlobInput DirectMultiFileInfo<OP>::GetGlobInput() {
}

//------------------------------------------------------------------------------
// Register
// Operations
//------------------------------------------------------------------------------

struct ReadBlobOperation {
static constexpr const char *NAME = "read_blob";
static constexpr const char *FILE_TYPE = "blob";

static inline LogicalType TYPE() {
return LogicalType::BLOB;
}
};

struct ReadTextOperation {
static constexpr const char *NAME = "read_text";
static constexpr const char *FILE_TYPE = "text";

static inline LogicalType TYPE() {
return LogicalType::VARCHAR;
}
};

template <class OP>
static TableFunction GetFunction() {
MultiFileFunction<DirectMultiFileInfo<OP>> table_function(OP::NAME);
// Erase extra multi file reader options
table_function.named_parameters.erase("filename");
table_function.named_parameters.erase("hive_partitioning");
table_function.named_parameters.erase("union_by_name");
table_function.named_parameters.erase("hive_types");
table_function.named_parameters.erase("hive_types_autocast");
return table_function;
}

} // namespace

//------------------------------------------------------------------------------
// Register
//------------------------------------------------------------------------------

void ReadBlobFunction::RegisterFunction(BuiltinFunctions &set) {
auto scan_fun = GetFunction<ReadBlobOperation>();
set.AddFunction(MultiFileReader::CreateFunctionSet(scan_fun));
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "2-dev192"
#define DUCKDB_PATCH_VERSION "2-dev199"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 4
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.4.2-dev192"
#define DUCKDB_VERSION "v1.4.2-dev199"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "b68a1696de"
#define DUCKDB_SOURCE_ID "9ea6e07a29"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
49 changes: 0 additions & 49 deletions src/duckdb/src/include/duckdb/function/table/read_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,53 +31,4 @@ struct ReadFileGlobalState : public GlobalTableFunctionState {
bool requires_file_open = false;
};

struct ReadBlobOperation {
static constexpr const char *NAME = "read_blob";
static constexpr const char *FILE_TYPE = "blob";

static inline LogicalType TYPE() {
return LogicalType::BLOB;
}
};

struct ReadTextOperation {
static constexpr const char *NAME = "read_text";
static constexpr const char *FILE_TYPE = "text";

static inline LogicalType TYPE() {
return LogicalType::VARCHAR;
}
};

template <class OP>
struct DirectMultiFileInfo : MultiFileReaderInterface {
static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);
unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
optional_ptr<TableFunctionInfo> info) override;
bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
BaseFileReaderOptions &options, vector<string> &expected_names,
vector<LogicalType> &expected_types) override;
bool ParseOption(ClientContext &context, const string &key, const Value &val, MultiFileOptions &file_options,
BaseFileReaderOptions &options) override;
unique_ptr<TableFunctionData> InitializeBindData(MultiFileBindData &multi_file_data,
unique_ptr<BaseFileReaderOptions> options) override;
void BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
MultiFileBindData &bind_data) override;
optional_idx MaxThreads(const MultiFileBindData &bind_data_p, const MultiFileGlobalState &global_state,
FileExpandResult expand_result) override;
unique_ptr<GlobalTableFunctionState> InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data,
MultiFileGlobalState &global_state) override;
unique_ptr<LocalTableFunctionState> InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &) override;
shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
BaseUnionData &union_data, const MultiFileBindData &bind_data_p) override;
shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
const OpenFileInfo &file, idx_t file_idx,
const MultiFileBindData &bind_data) override;
shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const OpenFileInfo &file,
BaseFileReaderOptions &options,
const MultiFileOptions &file_options) override;
unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
FileGlobInput GetGlobInput() override;
};

} // namespace duckdb
14 changes: 14 additions & 0 deletions src/duckdb/src/include/duckdb/logging/log_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class PhysicalOperator;
class AttachedDatabase;
class RowGroup;
struct DataTableInfo;
enum class MetricsType : uint8_t;

//! Log types provide some structure to the formats that the different log messages can have
//! For now, this holds a type that the VARCHAR value will be auto-cast into.
Expand Down Expand Up @@ -106,6 +107,19 @@ class PhysicalOperatorLogType : public LogType {
const vector<pair<string, string>> &info);
};

class MetricsLogType : public LogType {
public:
static constexpr const char *NAME = "Metrics";
static constexpr LogLevel LEVEL = LogLevel::LOG_INFO;

//! Construct the log type
MetricsLogType();

static LogicalType GetLogType();

static string ConstructLogMessage(const MetricsType &type, const Value &value);
};

class CheckpointLogType : public LogType {
public:
static constexpr const char *NAME = "Checkpoint";
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/include/duckdb/main/profiling_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class ProfilingInfo {

public:
string GetMetricAsString(const MetricsType metric) const;
void WriteMetricsToLog(ClientContext &context);
void WriteMetricsToJSON(duckdb_yyjson::yyjson_mut_doc *doc, duckdb_yyjson::yyjson_mut_val *destination);

public:
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/include/duckdb/main/query_profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class QueryProfiler {
static InsertionOrderPreservingMap<string> JSONSanitize(const InsertionOrderPreservingMap<string> &input);
static string JSONSanitize(const string &text);
static string DrawPadded(const string &str, idx_t width);
DUCKDB_API void ToLog() const;
DUCKDB_API string ToJSON() const;
DUCKDB_API void WriteToFile(const char *path, string &info) const;

Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/logging/log_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ void LogManager::RegisterDefaultLogTypes() {
RegisterLogType(make_uniq<HTTPLogType>());
RegisterLogType(make_uniq<QueryLogType>());
RegisterLogType(make_uniq<PhysicalOperatorLogType>());
RegisterLogType(make_uniq<MetricsLogType>());
}

} // namespace duckdb
24 changes: 24 additions & 0 deletions src/duckdb/src/logging/log_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ constexpr LogLevel FileSystemLogType::LEVEL;
constexpr LogLevel QueryLogType::LEVEL;
constexpr LogLevel HTTPLogType::LEVEL;
constexpr LogLevel PhysicalOperatorLogType::LEVEL;
constexpr LogLevel MetricsLogType::LEVEL;
constexpr LogLevel CheckpointLogType::LEVEL;

//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -147,6 +148,29 @@ string PhysicalOperatorLogType::ConstructLogMessage(const PhysicalOperator &phys

return Value::STRUCT(std::move(child_list)).ToString();
}

//===--------------------------------------------------------------------===//
// MetricsLogType
//===--------------------------------------------------------------------===//
MetricsLogType::MetricsLogType() : LogType(NAME, LEVEL, GetLogType()) {
}

LogicalType MetricsLogType::GetLogType() {
child_list_t<LogicalType> child_list = {
{"metric", LogicalType::VARCHAR},
{"value", LogicalType::VARCHAR},
};
return LogicalType::STRUCT(child_list);
}

string MetricsLogType::ConstructLogMessage(const MetricsType &metric, const Value &value) {
child_list_t<Value> child_list = {
{"metric", EnumUtil::ToString(metric)},
{"value", value.ToString()},
};
return Value::STRUCT(std::move(child_list)).ToString();
}

//===--------------------------------------------------------------------===//
// CheckpointLogType
//===--------------------------------------------------------------------===//
Expand Down
11 changes: 11 additions & 0 deletions src/duckdb/src/main/profiling_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "duckdb/common/enum_util.hpp"
#include "duckdb/main/query_profiler.hpp"
#include "duckdb/logging/log_manager.hpp"

#include "yyjson.hpp"

Expand Down Expand Up @@ -169,6 +170,16 @@ string ProfilingInfo::GetMetricAsString(const MetricsType metric) const {
return metrics.at(metric).ToString();
}

void ProfilingInfo::WriteMetricsToLog(ClientContext &context) {
auto &logger = Logger::Get(context);
if (logger.ShouldLog(MetricsLogType::NAME, MetricsLogType::LEVEL)) {
for (auto &metric : settings) {
logger.WriteLog(MetricsLogType::NAME, MetricsLogType::LEVEL,
MetricsLogType::ConstructLogMessage(metric, metrics[metric]));
}
}
}

void ProfilingInfo::WriteMetricsToJSON(yyjson_mut_doc *doc, yyjson_mut_val *dest) {
for (auto &metric : settings) {
auto metric_str = StringUtil::Lower(EnumUtil::ToString(metric));
Expand Down
16 changes: 16 additions & 0 deletions src/duckdb/src/main/query_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,9 @@ void QueryProfiler::EndQuery() {

guard.unlock();

// To log is inexpensive, whether to log or not depends on whether logging is active
ToLog();

if (emit_output) {
string tree = ToString();
auto save_location = GetSaveLocation();
Expand Down Expand Up @@ -797,6 +800,19 @@ static string StringifyAndFree(yyjson_mut_doc *doc, yyjson_mut_val *object) {
return result;
}

void QueryProfiler::ToLog() const {
lock_guard<std::mutex> guard(lock);

if (!root) {
// No root, not much to do
return;
}

auto &settings = root->GetProfilingInfo();

settings.WriteMetricsToLog(context);
}

string QueryProfiler::ToJSON() const {
lock_guard<std::mutex> guard(lock);
auto doc = yyjson_mut_doc_new(nullptr);
Expand Down
7 changes: 7 additions & 0 deletions src/duckdb/src/storage/compression/validity_uncompressed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,13 @@ void ValidityUncompressed::UnalignedScan(data_ptr_t input, idx_t input_size, idx
// otherwise the subsequent bitwise & will modify values outside of the range of values we want to alter
input_mask |= ValidityUncompressed::UPPER_MASKS[shift_amount];

if (pos == 0) {
// We also need to set the lower bits, which are to the left of the relevant bits (x), to 1
// These are the bits that are "behind" this scan window, and should not affect this scan
auto non_relevant_mask = ValidityUncompressed::LOWER_MASKS[result_idx];
input_mask |= non_relevant_mask;
}

// after this, we move to the next input_entry
offset = ValidityMask::BITS_PER_VALUE - input_idx;
input_entry++;
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/storage/table/array_column_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ void ArrayColumnData::Select(TransactionData transaction, idx_t vector_index, Co
// not consecutive - break
break;
}
end_idx = next_idx;
end_idx = next_idx + 1;
}
consecutive_ranges++;
}
Expand Down