Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion ci/test_wheel_cuvs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,18 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
LIBCUVS_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
CUVS_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" cuvs --stable --cuda "$RAPIDS_CUDA_VERSION")")

# echo to expand wildcard before adding `[extra]` requires for pip
# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints test_python "${PIP_CONSTRAINT}"

# notes:
#
# * echo to expand wildcard before adding `[test]` requires for pip
# * just providing --constraint="${PIP_CONSTRAINT}" to be explicit, and because
# that environment variable is ignored if any other --constraint are passed via the CLI
#
rapids-pip-retry install \
--prefer-binary \
--constraint "${PIP_CONSTRAINT}" \
"${LIBCUVS_WHEELHOUSE}"/libcuvs*.whl \
"$(echo "${CUVS_WHEELHOUSE}"/cuvs*.whl)[test]"

Expand Down
24 changes: 0 additions & 24 deletions conda/recipes/libcuvs/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,6 @@ outputs:
- libcurand
- libcusolver
- libcusparse
- if: cuda_major == "13"
then:
- libnvjitlink
ignore_run_exports:
by_name:
- cuda-cudart
Expand All @@ -150,9 +147,6 @@ outputs:
- librmm
- mkl
- nccl
- if: cuda_major == "13"
then:
- libnvjitlink
about:
homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }}
license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }}
Expand Down Expand Up @@ -203,9 +197,6 @@ outputs:
- libcurand
- libcusolver
- libcusparse
- if: cuda_major == "13"
then:
- libnvjitlink
ignore_run_exports:
by_name:
- cuda-cudart
Expand All @@ -219,9 +210,6 @@ outputs:
- librmm
- mkl
- nccl
- if: cuda_major == "13"
then:
- libnvjitlink
about:
homepage: ${{ load_from_file("python/libcuvs/pyproject.toml").project.urls.Homepage }}
license: ${{ load_from_file("python/libcuvs/pyproject.toml").project.license }}
Expand Down Expand Up @@ -270,9 +258,6 @@ outputs:
- libcurand
- libcusolver
- libcusparse
- if: cuda_major == "13"
then:
- libnvjitlink
ignore_run_exports:
by_name:
- cuda-cudart
Expand All @@ -283,9 +268,6 @@ outputs:
- libcurand
- libcusolver
- libcusparse
- if: cuda_major == "13"
then:
- libnvjitlink
- librmm
- mkl
- nccl
Expand Down Expand Up @@ -436,9 +418,6 @@ outputs:
- libcurand
- libcusolver
- libcusparse
- if: cuda_major == "13"
then:
- libnvjitlink
ignore_run_exports:
by_name:
- cuda-cudart
Expand All @@ -449,9 +428,6 @@ outputs:
- libcurand
- libcusolver
- libcusparse
- if: cuda_major == "13"
then:
- libnvjitlink
- librmm
- mkl
- nccl
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ if(NOT BUILD_CPU_ONLY)
src/preprocessing/quantize/binary.cu
src/preprocessing/quantize/pq.cu
src/preprocessing/spectral/spectral_embedding.cu
src/preprocessing/pca/pca.cu
src/selection/select_k_float_int64_t.cu
src/selection/select_k_float_int32_t.cu
src/selection/select_k_float_uint32_t.cu
Expand Down
15 changes: 12 additions & 3 deletions cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
Expand Down Expand Up @@ -169,8 +169,12 @@ void hnsw_lib<T>::set_search_param(const search_param_base& param_, const void*
auto param = dynamic_cast<const search_param&>(param_);
appr_alg_->ef_ = param.ef;
num_threads_ = param.num_threads;
// bench_mode_ = param.metric_objective;
bench_mode_ = Mode::kLatency; // TODO(achirkin): pass the benchmark mode in the algo parameters
if (cuvs::bench::benchmark_n_threads > 1) {
bench_mode_ = Mode::kThroughput;
num_threads_ = 1; // Prevent nested parallelism (gbench threads + batch threads).
} else {
bench_mode_ = Mode::kLatency;
}

// Create a pool if multiple query threads have been set and the pool hasn't been created already
bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_);
Expand All @@ -181,6 +185,11 @@ template <typename T>
void hnsw_lib<T>::search(
const T* query, int batch_size, int k, algo_base::index_type* indices, float* distances) const
{
if (batch_size == 1) {
get_search_knn_results(query, k, indices, distances);
return;
}

auto f = [&](int i) {
// hnsw can only handle a single vector at a time.
get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k);
Expand Down
185 changes: 185 additions & 0 deletions cpp/include/cuvs/preprocessing/pca.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/pca_types.hpp>

namespace cuvs::preprocessing::pca {

using solver = raft::linalg::solver;

/**
* @brief Parameters for PCA decomposition. Ref:
* http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
*/
struct params {
/** @brief Number of components to keep. */
int n_components = 1;

/**
* @brief If false, data passed to fit are overwritten and running fit(X).transform(X) will
* not yield the expected results, use fit_transform(X) instead.
*/
bool copy = true;

/**
* @brief When true (false by default) the components vectors are multiplied by the square
* root of n_samples and then divided by the singular values to ensure uncorrelated outputs with
* unit component-wise variances.
*/
bool whiten = false;

/** @brief The solver algorithm to use. */
solver algorithm = solver::COV_EIG_DQ;

/**
* @brief Tolerance for singular values computed by svd_solver == 'arpack' or
* the Jacobi solver.
*/
float tol = 0.0f;

/**
* @brief Number of iterations for the power method computed by the Jacobi solver.
*/
int n_iterations = 15;
};

/**
* @defgroup pca PCA (Principal Component Analysis)
* @{
*/

/**
* @brief Perform PCA fit operation.
*
* Computes the principal components, explained variances, singular values, and column means
* from the input data.
*
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <cuvs/preprocessing/pca.hpp>
*
* raft::resources handle;
*
* cuvs::preprocessing::pca::params params;
* params.n_components = 2;
*
* auto input = raft::make_device_matrix<float, int>(handle, n_rows, n_cols);
* // ... fill input ...
*
* auto components = raft::make_device_matrix<float, int, raft::col_major>(
* handle, params.n_components, n_cols);
* auto explained_var = raft::make_device_vector<float, int>(handle, params.n_components);
* auto explained_var_ratio = raft::make_device_vector<float, int>(handle, params.n_components);
* auto singular_vals = raft::make_device_vector<float, int>(handle, params.n_components);
* auto mu = raft::make_device_vector<float, int>(handle, n_cols);
* auto noise_vars = raft::make_device_scalar<float>(handle);
*
* cuvs::preprocessing::pca::fit(handle, params,
* input.view(), components.view(), explained_var.view(),
* explained_var_ratio.view(), singular_vals.view(), mu.view(), noise_vars.view());
* @endcode
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
* @param[out] components principal components [n_components x n_cols] (col-major)
* @param[out] explained_var explained variances [n_components]
* @param[out] explained_var_ratio explained variance ratios [n_components]
* @param[out] singular_vals singular values [n_components]
* @param[out] mu column means [n_cols]
* @param[out] noise_vars noise variance (scalar)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
*/
void fit(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false);

/**
* @brief Perform PCA fit and transform operations.
*
* Computes the principal components and transforms the input data into the eigenspace
* in a single operation.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
* @param[out] trans_input transformed data [n_rows x n_components] (col-major)
* @param[out] components principal components [n_components x n_cols] (col-major)
* @param[out] explained_var explained variances [n_components]
* @param[out] explained_var_ratio explained variance ratios [n_components]
* @param[out] singular_vals singular values [n_components]
* @param[out] mu column means [n_cols]
* @param[out] noise_vars noise variance (scalar)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
*/
void fit_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false);

/**
* @brief Perform PCA transform operation.
*
* Transforms the input data into the eigenspace using previously computed principal components.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input data to transform [n_rows x n_cols] (col-major). Modified temporarily
* (mean-centered then restored).
* @param[in] components principal components [n_components x n_cols] (col-major)
* @param[in] singular_vals singular values [n_components]
* @param[in] mu column means [n_cols]
* @param[out] trans_input transformed data [n_rows x n_components] (col-major)
*/
void transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input);

/**
* @brief Perform PCA inverse transform operation.
*
* Transforms data from the eigenspace back to the original space.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[in] trans_input transformed data [n_rows x n_components] (col-major)
* @param[in] components principal components [n_components x n_cols] (col-major)
* @param[in] singular_vals singular values [n_components]
* @param[in] mu column means [n_cols]
* @param[out] output reconstructed data [n_rows x n_cols] (col-major)
*/
void inverse_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> output);

/** @} */ // end group pca

} // namespace cuvs::preprocessing::pca
7 changes: 5 additions & 2 deletions cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -875,7 +875,10 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
uint32_t sample_offset = 0;
if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; }
assert(list_length == chunk_indices[probe_id] - sample_offset);
assert(sample_offset + list_length <= max_samples);
if constexpr (!kManageLocalTopK) {
// max_samples is zero/unused in the kManageLocalTopK mode
assert(sample_offset + list_length <= max_samples);
}

constexpr int kUnroll = raft::WarpSize / Veclen;
constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,10 @@ __device__ __forceinline__ void interleaved_scan_kernel_impl(const uint32_t quer
uint32_t sample_offset = 0;
if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; }
assert(list_length == chunk_indices[probe_id] - sample_offset);
assert(sample_offset + list_length <= max_samples);
if constexpr (!kManageLocalTopK) {
// max_samples is zero/unused in the kManageLocalTopK mode
assert(sample_offset + list_length <= max_samples);
}

constexpr int kUnroll = raft::WarpSize / Veclen;
constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize;
Expand Down
Loading
Loading