rapidsai · gforsyth · Mar 24, 2026 · Mar 19, 2026 · Mar 20, 2026 · Mar 24, 2026
@@ -13,8 +13,18 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
 LIBCUVS_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 CUVS_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" cuvs --stable --cuda "$RAPIDS_CUDA_VERSION")")
 
-# echo to expand wildcard before adding `[extra]` requires for pip
+# generate constraints (possibly pinning to oldest support versions of dependencies)
+rapids-generate-pip-constraints test_python "${PIP_CONSTRAINT}"
+
+# notes:
+#
+#   * echo to expand wildcard before adding `[test]` requires for pip
+#   * just providing --constraint="${PIP_CONSTRAINT}" to be explicit, and because
+#     that environment variable is ignored if any other --constraint are passed via the CLI
+#
 rapids-pip-retry install \
+    --prefer-binary \
+    --constraint "${PIP_CONSTRAINT}" \
     "${LIBCUVS_WHEELHOUSE}"/libcuvs*.whl \
     "$(echo "${CUVS_WHEELHOUSE}"/cuvs*.whl)[test]"
 

@@ -134,9 +134,6 @@ outputs:
         - libcurand
         - libcusolver
         - libcusparse
-        - if: cuda_major == "13"
-          then:
-            - libnvjitlink
       ignore_run_exports:
         by_name:
           - cuda-cudart
@@ -150,9 +147,6 @@ outputs:
           - librmm
           - mkl
           - nccl
-          - if: cuda_major == "13"
-            then:
-              - libnvjitlink
     about:
       homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }}
       license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }}
@@ -203,9 +197,6 @@ outputs:
         - libcurand
         - libcusolver
         - libcusparse
-        - if: cuda_major == "13"
-          then:
-            - libnvjitlink
       ignore_run_exports:
         by_name:
           - cuda-cudart
@@ -219,9 +210,6 @@ outputs:
           - librmm
           - mkl
           - nccl
-          - if: cuda_major == "13"
-            then:
-              - libnvjitlink
     about:
       homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }}
       license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }}
@@ -270,9 +258,6 @@ outputs:
         - libcurand
         - libcusolver
         - libcusparse
-        - if: cuda_major == "13"
-          then:
-            - libnvjitlink
       ignore_run_exports:
         by_name:
           - cuda-cudart
@@ -283,9 +268,6 @@ outputs:
           - libcurand
           - libcusolver
           - libcusparse
-          - if: cuda_major == "13"
-            then:
-              - libnvjitlink
           - librmm
           - mkl
           - nccl
@@ -436,9 +418,6 @@ outputs:
         - libcurand
         - libcusolver
         - libcusparse
-        - if: cuda_major == "13"
-          then:
-            - libnvjitlink
       ignore_run_exports:
         by_name:
           - cuda-cudart
@@ -449,9 +428,6 @@ outputs:
           - libcurand
           - libcusolver
           - libcusparse
-          - if: cuda_major == "13"
-            then:
-              - libnvjitlink
           - librmm
           - mkl
           - nccl

@@ -664,6 +664,7 @@ if(NOT BUILD_CPU_ONLY)
     src/preprocessing/quantize/binary.cu
     src/preprocessing/quantize/pq.cu
     src/preprocessing/spectral/spectral_embedding.cu
+    src/preprocessing/pca/pca.cu
     src/selection/select_k_float_int64_t.cu
     src/selection/select_k_float_int32_t.cu
     src/selection/select_k_float_uint32_t.cu

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
@@ -169,8 +169,12 @@ void hnsw_lib<T>::set_search_param(const search_param_base& param_, const void*
   auto param     = dynamic_cast<const search_param&>(param_);
   appr_alg_->ef_ = param.ef;
   num_threads_   = param.num_threads;
-  // bench_mode_ = param.metric_objective;
-  bench_mode_ = Mode::kLatency;  // TODO(achirkin): pass the benchmark mode in the algo parameters
+  if (cuvs::bench::benchmark_n_threads > 1) {
+    bench_mode_  = Mode::kThroughput;
+    num_threads_ = 1;  // Prevent nested parallelism (gbench threads + batch threads).
+  } else {
+    bench_mode_ = Mode::kLatency;
+  }
 
   // Create a pool if multiple query threads have been set and the pool hasn't been created already
   bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_);
@@ -181,6 +185,11 @@ template <typename T>
 void hnsw_lib<T>::search(
   const T* query, int batch_size, int k, algo_base::index_type* indices, float* distances) const
 {
+  if (batch_size == 1) {
+    get_search_knn_results(query, k, indices, distances);
+    return;
+  }
+
   auto f = [&](int i) {
     // hnsw can only handle a single vector at a time.
     get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k);

@@ -0,0 +1,185 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/linalg/pca_types.hpp>
+
+namespace cuvs::preprocessing::pca {
+
+using solver = raft::linalg::solver;
+
+/**
+ * @brief Parameters for PCA decomposition. Ref:
+ * http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
+ */
+struct params {
+  /** @brief Number of components to keep. */
+  int n_components = 1;
+
+  /**
+   * @brief If false, data passed to fit are overwritten and running fit(X).transform(X) will
+   * not yield the expected results, use fit_transform(X) instead.
+   */
+  bool copy = true;
+
+  /**
+   * @brief When true (false by default) the components vectors are multiplied by the square
+   * root of n_samples and then divided by the singular values to ensure uncorrelated outputs with
+   * unit component-wise variances.
+   */
+  bool whiten = false;
+
+  /** @brief The solver algorithm to use. */
+  solver algorithm = solver::COV_EIG_DQ;
+
+  /**
+   * @brief Tolerance for singular values computed by svd_solver == 'arpack' or
+   * the Jacobi solver.
+   */
+  float tol = 0.0f;
+
+  /**
+   * @brief Number of iterations for the power method computed by the Jacobi solver.
+   */
+  int n_iterations = 15;
+};
+
+/**
+ * @defgroup pca PCA (Principal Component Analysis)
+ * @{
+ */
+
+/**
+ * @brief Perform PCA fit operation.
+ *
+ * Computes the principal components, explained variances, singular values, and column means
+ * from the input data.
+ *
+ * @code{.cpp}
+ * #include <raft/core/resources.hpp>
+ * #include <cuvs/preprocessing/pca.hpp>
+ *
+ * raft::resources handle;
+ *
+ * cuvs::preprocessing::pca::params params;
+ * params.n_components = 2;
+ *
+ * auto input = raft::make_device_matrix<float, int>(handle, n_rows, n_cols);
+ * // ... fill input ...
+ *
+ * auto components       = raft::make_device_matrix<float, int, raft::col_major>(
+ *     handle, params.n_components, n_cols);
+ * auto explained_var    = raft::make_device_vector<float, int>(handle, params.n_components);
+ * auto explained_var_ratio = raft::make_device_vector<float, int>(handle, params.n_components);
+ * auto singular_vals    = raft::make_device_vector<float, int>(handle, params.n_components);
+ * auto mu               = raft::make_device_vector<float, int>(handle, n_cols);
+ * auto noise_vars       = raft::make_device_scalar<float>(handle);
+ *
+ * cuvs::preprocessing::pca::fit(handle, params,
+ *     input.view(), components.view(), explained_var.view(),
+ *     explained_var_ratio.view(), singular_vals.view(), mu.view(), noise_vars.view());
+ * @endcode
+ *
+ * @param[in] handle raft resource handle
+ * @param[in] config PCA parameters
+ * @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
+ * @param[out] components principal components [n_components x n_cols] (col-major)
+ * @param[out] explained_var explained variances [n_components]
+ * @param[out] explained_var_ratio explained variance ratios [n_components]
+ * @param[out] singular_vals singular values [n_components]
+ * @param[out] mu column means [n_cols]
+ * @param[out] noise_vars noise variance (scalar)
+ * @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
+ */
+void fit(raft::resources const& handle,
+         const params& config,
+         raft::device_matrix_view<float, int64_t, raft::col_major> input,
+         raft::device_matrix_view<float, int64_t, raft::col_major> components,
+         raft::device_vector_view<float, int64_t> explained_var,
+         raft::device_vector_view<float, int64_t> explained_var_ratio,
+         raft::device_vector_view<float, int64_t> singular_vals,
+         raft::device_vector_view<float, int64_t> mu,
+         raft::device_scalar_view<float, int64_t> noise_vars,
+         bool flip_signs_based_on_U = false);
+
+/**
+ * @brief Perform PCA fit and transform operations.
+ *
+ * Computes the principal components and transforms the input data into the eigenspace
+ * in a single operation.
+ *
+ * @param[in] handle raft resource handle
+ * @param[in] config PCA parameters
+ * @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
+ * @param[out] trans_input transformed data [n_rows x n_components] (col-major)
+ * @param[out] components principal components [n_components x n_cols] (col-major)
+ * @param[out] explained_var explained variances [n_components]
+ * @param[out] explained_var_ratio explained variance ratios [n_components]
+ * @param[out] singular_vals singular values [n_components]
+ * @param[out] mu column means [n_cols]
+ * @param[out] noise_vars noise variance (scalar)
+ * @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
+ */
+void fit_transform(raft::resources const& handle,
+                   const params& config,
+                   raft::device_matrix_view<float, int64_t, raft::col_major> input,
+                   raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
+                   raft::device_matrix_view<float, int64_t, raft::col_major> components,
+                   raft::device_vector_view<float, int64_t> explained_var,
+                   raft::device_vector_view<float, int64_t> explained_var_ratio,
+                   raft::device_vector_view<float, int64_t> singular_vals,
+                   raft::device_vector_view<float, int64_t> mu,
+                   raft::device_scalar_view<float, int64_t> noise_vars,
+                   bool flip_signs_based_on_U = false);
+
+/**
+ * @brief Perform PCA transform operation.
+ *
+ * Transforms the input data into the eigenspace using previously computed principal components.
+ *
+ * @param[in] handle raft resource handle
+ * @param[in] config PCA parameters
+ * @param[inout] input data to transform [n_rows x n_cols] (col-major). Modified temporarily
+ * (mean-centered then restored).
+ * @param[in] components principal components [n_components x n_cols] (col-major)
+ * @param[in] singular_vals singular values [n_components]
+ * @param[in] mu column means [n_cols]
+ * @param[out] trans_input transformed data [n_rows x n_components] (col-major)
+ */
+void transform(raft::resources const& handle,
+               const params& config,
+               raft::device_matrix_view<float, int64_t, raft::col_major> input,
+               raft::device_matrix_view<float, int64_t, raft::col_major> components,
+               raft::device_vector_view<float, int64_t> singular_vals,
+               raft::device_vector_view<float, int64_t> mu,
+               raft::device_matrix_view<float, int64_t, raft::col_major> trans_input);
+
+/**
+ * @brief Perform PCA inverse transform operation.
+ *
+ * Transforms data from the eigenspace back to the original space.
+ *
+ * @param[in] handle raft resource handle
+ * @param[in] config PCA parameters
+ * @param[in] trans_input transformed data [n_rows x n_components] (col-major)
+ * @param[in] components principal components [n_components x n_cols] (col-major)
+ * @param[in] singular_vals singular values [n_components]
+ * @param[in] mu column means [n_cols]
+ * @param[out] output reconstructed data [n_rows x n_cols] (col-major)
+ */
+void inverse_transform(raft::resources const& handle,
+                       const params& config,
+                       raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
+                       raft::device_matrix_view<float, int64_t, raft::col_major> components,
+                       raft::device_vector_view<float, int64_t> singular_vals,
+                       raft::device_vector_view<float, int64_t> mu,
+                       raft::device_matrix_view<float, int64_t, raft::col_major> output);
+
+/** @} */  // end group pca
+
+}  // namespace cuvs::preprocessing::pca
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -875,7 +875,10 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
       uint32_t sample_offset = 0;
       if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; }
       assert(list_length == chunk_indices[probe_id] - sample_offset);
-      assert(sample_offset + list_length <= max_samples);
+      if constexpr (!kManageLocalTopK) {
+        // max_samples is zero/unused in the kManageLocalTopK mode
+        assert(sample_offset + list_length <= max_samples);
+      }
 
       constexpr int kUnroll        = raft::WarpSize / Veclen;
       constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize;

@@ -833,7 +833,10 @@ __device__ __forceinline__ void interleaved_scan_kernel_impl(const uint32_t quer
       uint32_t sample_offset = 0;
       if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; }
       assert(list_length == chunk_indices[probe_id] - sample_offset);
-      assert(sample_offset + list_length <= max_samples);
+      if constexpr (!kManageLocalTopK) {
+        // max_samples is zero/unused in the kManageLocalTopK mode
+        assert(sample_offset + list_length <= max_samples);
+      }
 
       constexpr int kUnroll        = raft::WarpSize / Veclen;
       constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize;