Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion qdp/qdp-core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::env;
use std::process::Command;

fn main() {
compile_protos();
configure_cuda_linkage();
}

fn compile_protos() {
// Use vendored protoc to avoid missing protoc in CI/dev environments
unsafe {
std::env::set_var("PROTOC", protoc_bin_vendored::protoc_bin_path().unwrap());
env::set_var("PROTOC", protoc_bin_vendored::protoc_bin_path().unwrap());
}

let mut config = prost_build::Config::new();
Expand All @@ -34,3 +42,54 @@ fn main() {

println!("cargo:rerun-if-changed=proto/tensor.proto");
}

/// Detect the CUDA Runtime toolkit and emit the appropriate link directives.
///
/// `qdp-core` declares CUDA Runtime API extern symbols in `src/gpu/cuda_ffi.rs`
/// (cudaHostAlloc, cudaMemGetInfo, cudaEventCreateWithFlags, ...). Those symbols
/// must be resolved at link time, which requires `libcudart` from the CUDA
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing cargo:rerun-if-env-changed=PATH. The whole decision hinges on nvcc-on-PATH, so installing CUDA after a stub build won't re-trigger the script until cargo clean. Same gap in qdp-kernels.

/// Toolkit. Previously the only `-lcudart` directive lived in `qdp-kernels`'
/// build script, and the `qdp_no_cuda` cfg it sets does not propagate
/// cross-crate. The result was confusing linker errors on systems that have
/// the NVIDIA driver but not the toolkit (e.g. PyTorch-only setups, where
/// PyTorch ships its own bundled cudart inside the wheel).
///
/// This function:
/// * emits `cargo:rustc-link-lib=cudart` and the appropriate
/// `cargo:rustc-link-search` path when nvcc is found, and
/// * emits `cargo:rustc-cfg=qdp_no_cuda` when it is not, gating the
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

output().is_ok() is true even if nvcc exits non-zero — a half-installed nvcc would set has_cuda = true and fall through to a link error. Suggest .map(|o| o.status.success()).unwrap_or(false). Same idiom in qdp-kernels/build.rs:177; fix both so they can't disagree.

/// extern block in `cuda_ffi.rs` so the crate still links (with stubs
/// that return a non-zero CUDA error at runtime).
fn configure_cuda_linkage() {
println!("cargo::rustc-check-cfg=cfg(qdp_no_cuda)");
println!("cargo:rerun-if-env-changed=QDP_NO_CUDA");
println!("cargo:rerun-if-env-changed=CUDA_PATH");

let force_no_cuda = env::var("QDP_NO_CUDA")
.map(|v| v == "1" || v.eq_ignore_ascii_case("true") || v.eq_ignore_ascii_case("yes"))
.unwrap_or(false);

let has_cuda = !force_no_cuda
&& Command::new("nvcc")
.arg("--version")
.output()
.is_ok();

if !has_cuda {
println!("cargo:rustc-cfg=qdp_no_cuda");
println!(
"cargo:warning=qdp-core: CUDA toolkit not found (nvcc not in PATH). \
Building with stub CUDA Runtime symbols; GPU functionality will be \
unavailable at runtime. Install the CUDA Toolkit to enable GPU support."
);
return;
}

let cuda_path = env::var("CUDA_PATH").unwrap_or_else(|_| "/usr/local/cuda".to_string());
println!("cargo:rustc-link-search=native={}/lib64", cuda_path);
println!("cargo:rustc-link-lib=cudart");

// On macOS, also check /usr/local/cuda/lib
#[cfg(target_os = "macos")]
println!("cargo:rustc-link-search=native={}/lib", cuda_path);
}
121 changes: 121 additions & 0 deletions qdp/qdp-core/src/gpu/cuda_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ pub(crate) const CUDA_SUCCESS: i32 = 0;
#[allow(dead_code)]
pub(crate) const CUDA_ERROR_NOT_READY: i32 = 34;

// CUDA Runtime API bindings.
//
// On systems with the CUDA Toolkit installed, `qdp-core`'s build script emits
// `cargo:rustc-link-lib=cudart` and the extern block below is used unchanged.
//
// On systems without the toolkit (e.g. driver-only Linux hosts, or macOS),
// the build script sets `qdp_no_cuda` and the stub block is used instead.
// Stubs match each extern's signature and return a non-zero sentinel
// (`QDP_CUDA_UNAVAILABLE`, mirroring qdp-kernels' "999" convention) so the
// crate links cleanly but any actual GPU call surfaces as a runtime error
// through the existing `if ret != 0 { Err(...) }` paths in the callers.

#[cfg(not(qdp_no_cuda))]
unsafe extern "C" {
pub(crate) fn cudaHostAlloc(pHost: *mut *mut c_void, size: usize, flags: u32) -> i32;
pub(crate) fn cudaFreeHost(ptr: *mut c_void) -> i32;
Expand Down Expand Up @@ -99,3 +112,111 @@ unsafe extern "C" {
/// Reference: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
pub(crate) fn cudaEventElapsedTime(ms: *mut f32, start: *mut c_void, end: *mut c_void) -> i32;
}

// ---------------------------------------------------------------------------
// Stub implementations when building without the CUDA toolkit (`qdp_no_cuda`).
//
// Wrapped in a private module so a single `#[allow(non_snake_case)]` covers
// all stub names (the originals are camelCase to match the real CUDA Runtime
// API; the `extern "C"` block above is exempt from this lint, but plain Rust
// fns are not).
// ---------------------------------------------------------------------------

#[cfg(qdp_no_cuda)]
#[allow(non_snake_case)]
mod no_cuda_stubs {
use super::CudaPointerAttributes;
use std::ffi::c_void;

/// Sentinel error code returned by stub CUDA Runtime calls.
///
/// Matches the "999" convention used by qdp-kernels' kernel-launcher stubs.
const QDP_CUDA_UNAVAILABLE: i32 = 999;

pub(crate) unsafe fn cudaHostAlloc(
_pHost: *mut *mut c_void,
_size: usize,
_flags: u32,
) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaFreeHost(_ptr: *mut c_void) -> i32 {
QDP_CUDA_UNAVAILABLE
}

#[allow(dead_code)]
pub(crate) unsafe fn cudaPointerGetAttributes(
_attributes: *mut CudaPointerAttributes,
_ptr: *const c_void,
) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaMemGetInfo(_free: *mut usize, _total: *mut usize) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaMemcpyAsync(
_dst: *mut c_void,
_src: *const c_void,
_count: usize,
_kind: u32,
_stream: *mut c_void,
) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaEventCreateWithFlags(_event: *mut *mut c_void, _flags: u32) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaEventRecord(_event: *mut c_void, _stream: *mut c_void) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaEventDestroy(_event: *mut c_void) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaStreamWaitEvent(
_stream: *mut c_void,
_event: *mut c_void,
_flags: u32,
) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaStreamSynchronize(_stream: *mut c_void) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaMemsetAsync(
_devPtr: *mut c_void,
_value: i32,
_count: usize,
_stream: *mut c_void,
) -> i32 {
QDP_CUDA_UNAVAILABLE
}

#[allow(dead_code)]
pub(crate) unsafe fn cudaEventQuery(_event: *mut c_void) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaEventSynchronize(_event: *mut c_void) -> i32 {
QDP_CUDA_UNAVAILABLE
}

pub(crate) unsafe fn cudaEventElapsedTime(
_ms: *mut f32,
_start: *mut c_void,
_end: *mut c_void,
) -> i32 {
QDP_CUDA_UNAVAILABLE
}
}

#[cfg(qdp_no_cuda)]
pub(crate) use no_cuda_stubs::*;
Loading