Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ members = [
"vortex-bench",
"vortex-datafusion",
"vortex-duckdb",
"vortex-clickhouse",
"vortex-cuda",
"vortex-cuda/cub",
"vortex-cuda/macros",
Expand Down Expand Up @@ -280,6 +281,7 @@ vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features =

# No version constraints for unpublished crates.
vortex-bench = { path = "./vortex-bench", default-features = false }
vortex-clickhouse = { path = "./vortex-clickhouse" }
vortex-cuda = { path = "./vortex-cuda", default-features = false }
vortex-cuda-macros = { path = "./vortex-cuda/macros" }
vortex-duckdb = { path = "./vortex-duckdb", default-features = false }
Expand Down
49 changes: 49 additions & 0 deletions vortex-clickhouse/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[package]
name = "vortex-clickhouse"
version = { workspace = true }
description = "Vortex ClickHouse extension"
homepage = { workspace = true }
repository = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
publish = false
keywords = { workspace = true }
include = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
categories = { workspace = true }
readme = { workspace = true }

# The `links` key enables DEP_CLICKHOUSE_* environment variables in dependent build scripts.
# See: https://doc.rust-lang.org/cargo/reference/build-scripts.html#the-links-manifest-key
links = "clickhouse"

[lib]
name = "vortex_clickhouse"
path = "src/lib.rs"
crate-type = ["staticlib", "cdylib", "rlib"]

[dependencies]
async-fs = { workspace = true }
bitvec = { workspace = true }
futures = { workspace = true }
glob = { workspace = true }
itertools = { workspace = true }
num-traits = { workspace = true }
object_store = { workspace = true, features = ["aws", "gcp", "azure", "http"] }
parking_lot = { workspace = true }
paste = { workspace = true }
smol = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
vortex = { workspace = true, features = ["files", "tokio", "object_store"] }
vortex-utils = { workspace = true, features = ["dashmap"] }

[dev-dependencies]
tempfile = { workspace = true }

[lints]
workspace = true

[build-dependencies]
walkdir = { workspace = true }
65 changes: 65 additions & 0 deletions vortex-clickhouse/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Vortex ClickHouse

ClickHouse format plugin for reading and writing [Vortex](https://github.com/spiraldb/vortex) files. Implemented as a Rust static library linked into ClickHouse via C++ FFI wrappers.

## Features

- Read Vortex files: `SELECT * FROM file('data.vortex', 'Vortex')`
- Write Vortex files: `INSERT ... TO 'output.vortex' FORMAT Vortex`
- Automatic schema inference
- Predicate & projection pushdown

## Prerequisites

- **Ninja**: `brew install ninja` (macOS) | `apt-get install ninja-build` (Ubuntu)
- **CMake 3.20+**: `brew install cmake` (macOS) | `apt-get install cmake` (Ubuntu)
- **Rust 1.89+**
- **C++17 compatible compiler**: GCC or Clang

## Build Modes

### Default (Release)

```bash
cargo build -p vortex-clickhouse
```

### Debug Build

Opt into ClickHouse debug build: `VX_CLICKHOUSE_DEBUG=1`.

```bash
VX_CLICKHOUSE_DEBUG=1 cargo build -p vortex-clickhouse
```

## Environment Variables

| Variable | Effect |
| ---------------------- | ------------------------------------------------------------- |
| `VX_CLICKHOUSE_DEBUG` | Build ClickHouse in debug mode |
| `CLICKHOUSE_VERSION` | ClickHouse version to build against (default: latest release) |
| `CLICKHOUSE_SOURCE_DIR`| Path to ClickHouse source directory |

## Running Tests

```bash
# Default release build
cargo test -p vortex-clickhouse

# Debug build
VX_CLICKHOUSE_DEBUG=1 cargo test -p vortex-clickhouse
```

## Usage

```sql
-- Read from a Vortex file
SELECT * FROM file('data.vortex', 'Vortex');

-- Read with predicate pushdown
SELECT * FROM file('data.vortex', 'Vortex') WHERE id > 100;

-- Write query results to Vortex
INSERT INTO FUNCTION file('output.vortex', 'Vortex')
SELECT * FROM my_table;
```
71 changes: 71 additions & 0 deletions vortex-clickhouse/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![allow(clippy::unwrap_used)]
#![allow(clippy::expect_used)]
#![allow(clippy::panic)]

//! Build script for vortex-clickhouse.
//!
//! This script:
//! 1. Downloads ClickHouse source code (for headers) - optional
//! 2. Generates Rust bindings from C++ headers (bindgen) - when headers are available
//! 3. Creates placeholder cpp.rs for now

use std::env;
use std::fs;
use std::path::PathBuf;

fn main() {
println!("cargo:rerun-if-env-changed=CLICKHOUSE_VERSION");
println!("cargo:rerun-if-env-changed=CLICKHOUSE_SOURCE_DIR");

let crate_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());

// For now, we'll create a minimal cpp.rs since we don't have ClickHouse headers yet
// The full implementation will require ClickHouse headers
let cpp_rs_content = r#"// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! FFI bindings for ClickHouse.
//!
//! This module is auto-generated by bindgen when ClickHouse headers are available.
//! For now, it contains placeholder definitions.

#![allow(dead_code)]
#![allow(non_camel_case_types)]
#![allow(non_upper_case_globals)]
#![allow(non_snake_case)]

// Placeholder types - will be replaced by bindgen-generated code
// when ClickHouse headers are available.

/// Result status for operations.
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum vortex_ch_state {
Success = 0,
Error = 1,
}
"#;

let cpp_rs_path = crate_dir.join("src/cpp.rs");
let existing = fs::read_to_string(&cpp_rs_path).unwrap_or_default();
if existing != cpp_rs_content {
fs::write(&cpp_rs_path, cpp_rs_content).expect("Failed to write cpp.rs");
}

// Watch C/C++ source files for changes.
for entry in walkdir::WalkDir::new(crate_dir.join("cpp"))
.into_iter()
.flatten()
{
if entry
.path()
.extension()
.is_some_and(|ext| ext == "cpp" || ext == "h" || ext == "hpp")
{
println!("cargo:rerun-if-changed={}", entry.path().display());
}
}
}
27 changes: 27 additions & 0 deletions vortex-clickhouse/cbindgen.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# cbindgen configuration for generating C header from Rust code.
# This exports Rust functions for C++ to call.

language = "C"
include_guard = "VORTEX_CLICKHOUSE_H"
autogen_warning = "/* Warning: this file is autogenerated by cbindgen. Do not edit manually. */"

# Include all exported Rust types
[export]
prefix = "vortex_"
include = []
exclude = []

[parse]
parse_deps = false
clean = false

[parse.expand]
crates = ["vortex-clickhouse"]

[fn]
# Use C ABI for all exported functions
args = "vertical"

[struct]
derive_eq = true
derive_neq = true
26 changes: 26 additions & 0 deletions vortex-clickhouse/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
cmake_minimum_required(VERSION 3.20)

# This CMakeLists.txt is used when building the C++ components
# as part of ClickHouse's build system.

# Source files
set(VORTEX_CLICKHOUSE_SOURCES
VortexBlockInputFormat.cpp
VortexBlockOutputFormat.cpp
registerFormats.cpp
)

# Headers
set(VORTEX_CLICKHOUSE_HEADERS
include/clickhouse_vx.h
include/clickhouse_vx/common.h
include/clickhouse_vx/format.h
include/clickhouse_vx/column.h
)

# Include directories
include_directories(include)

# Note: When integrated into ClickHouse build system, this will be built
# as part of the main ClickHouse library. The vortex-clickhouse Rust crate
# will be linked as a static library.
53 changes: 53 additions & 0 deletions vortex-clickhouse/cpp/include/clickhouse_vx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#pragma once

// Main header file for vortex-clickhouse C++ components.
//
// This header provides the C API for integrating Vortex with ClickHouse.
// Include this header in your ClickHouse plugin code.
//
// The API is organized into several components:
//
// - common.h: Basic types and handles
// - scanner.h: Reading Vortex files
// - exporter.h: Extracting data from Vortex arrays
// - writer.h: Writing Vortex files
// - column.h: Column type utilities
// - format.h: Format constants
//
// Example usage (reading):
//
// #include <clickhouse_vx.h>
//
// // Open a Vortex file
// VortexScanner* scanner = vortex_scanner_new("/path/to/data.vortex");
// if (!scanner) { return handleError(); }
//
// // Get schema
// size_t num_cols = vortex_scanner_num_columns(scanner);
// for (size_t i = 0; i < num_cols; i++) {
// const char* name = vortex_scanner_column_name(scanner, i);
// const char* type = vortex_scanner_column_type(scanner, i);
// // ... configure ClickHouse columns ...
// }
//
// // Read data
// while (vortex_scanner_has_more(scanner)) {
// VortexExporterHandle* batch = vortex_scanner_read_batch(scanner);
// while (vortex_exporter_has_more(batch)) {
// // Export to ClickHouse columns
// vortex_exporter_export(batch, buffer, max_rows);
// }
// vortex_exporter_free(batch);
// }
//
// vortex_scanner_free(scanner);

#include "clickhouse_vx/common.h"
#include "clickhouse_vx/format.h"
#include "clickhouse_vx/column.h"
#include "clickhouse_vx/scanner.h"
#include "clickhouse_vx/exporter.h"
#include "clickhouse_vx/writer.h"
Loading