Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions diskann-benchmark/src/backend/exhaustive/product.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ mod imp {
5,
);

let offsets = diskann_providers::model::pq::calculate_chunk_offsets_auto(
data.ncols(),
input.num_pq_chunks.get(),
);
let dim = std::num::NonZeroUsize::new(data.ncols())
.ok_or_else(|| anyhow::anyhow!("data has zero columns"))?;
let offsets =
diskann_quantization::views::ChunkOffsets::from_dim(dim, input.num_pq_chunks)?;

let base = {
let threadpool = rayon::ThreadPoolBuilder::new()
Expand All @@ -97,7 +97,7 @@ mod imp {
threadpool.install(|| -> anyhow::Result<_> {
Ok(parameters.train(
data.as_view(),
diskann_quantization::views::ChunkOffsetsView::new(offsets.as_slice())?,
offsets.as_view(),
diskann_quantization::Parallelism::Rayon,
&diskann_quantization::random::StdRngBuilder::new(input.seed),
&diskann_quantization::cancel::DontCancel,
Expand All @@ -109,7 +109,7 @@ mod imp {
data.ncols(),
base.flatten().into(),
vec![0.0; data.ncols()].into(),
offsets.into(),
offsets.as_slice().into(),
)?;

let training_time: MicroSeconds = start.elapsed().into();
Expand Down
19 changes: 14 additions & 5 deletions diskann-disk/src/storage/quant/pq/pq_generation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ use std::marker::PhantomData;
use diskann::{utils::VectorRepr, ANNError};
use diskann_providers::storage::{StorageReadProvider, StorageWriteProvider};
use diskann_providers::{
model::{
pq::{accum_row_inplace, generate_pq_pivots},
GeneratePivotArguments,
},
model::{pq::generate_pq_pivots, GeneratePivotArguments},
storage::PQStorage,
utils::{BridgeErr, RayonThreadPoolRef, Timer},
};
Expand Down Expand Up @@ -136,7 +133,19 @@ where
)
.bridge_err()?;

accum_row_inplace(full_pivot_data_mat.as_mut_view(), centroid.as_slice());
if full_pivot_data_mat.ncols() != centroid.len() {
return Err(ANNError::log_pq_error(format_args!(
"pivot data ncols {} does not match centroid length {}",
full_pivot_data_mat.ncols(),
centroid.len(),
)));
}

for row in full_pivot_data_mat.row_iter_mut() {
for (a, b) in std::iter::zip(row.iter_mut(), centroid.iter()) {
*a += *b;
}
}

let table = TransposedTable::from_parts(
full_pivot_data_mat.as_view(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ mod tests {
let c = provider.query_computer(&[-0.5, -0.5]).unwrap();
let expected: f32 = 1.5 * 1.5 * 2.0;
assert_eq!(
c.evaluate_similarity(&provider.get_vector_sync(3).unwrap()),
c.evaluate_similarity(provider.get_vector_sync(3).unwrap().as_slice()),
expected
);

Expand All @@ -362,14 +362,14 @@ mod tests {
assert_eq!(
d.evaluate_similarity(
provider.get_vector_sync(0).unwrap().as_slice(),
provider.get_vector_sync(3).unwrap().as_slice(),
provider.get_vector_sync(3).unwrap().as_slice()
),
2.0
);

let slice: &[f32] = &[-0.5, -0.5];
assert_eq!(
d.evaluate_similarity(slice, &provider.get_vector_sync(3).unwrap()),
d.evaluate_similarity(slice, provider.get_vector_sync(3).unwrap().as_slice()),
expected,
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,10 +444,7 @@ mod tests {
// Query Computer.
let c = provider.query_computer(&[-0.5, -0.5]).unwrap();
let expected: f32 = 1.5 * 1.5 * 2.0;
assert_eq!(
c.evaluate_similarity(&provider.get_vector_sync(3)),
expected
);
assert_eq!(c.evaluate_similarity(provider.get_vector_sync(3)), expected);

// Distance Computer.
let d = provider.distance_computer();
Expand Down
8 changes: 4 additions & 4 deletions diskann-providers/src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ pub use configuration::IndexConfiguration;
pub mod pq;
pub use pq::{
FixedChunkPQTable, GeneratePivotArguments, MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ,
NUM_PQ_CENTROIDS, accum_row_inplace, calculate_chunk_offsets_auto, compute_pq_distance,
compute_pq_distance_for_pq_coordinates, direct_distance_impl, distance,
generate_pq_data_from_pivots_from_membuf, generate_pq_data_from_pivots_from_membuf_batch,
generate_pq_pivots, generate_pq_pivots_from_membuf,
NUM_PQ_CENTROIDS, compute_pq_distance, compute_pq_distance_for_pq_coordinates,
direct_distance_impl, distance, generate_pq_data_from_pivots_from_membuf,
generate_pq_data_from_pivots_from_membuf_batch, generate_pq_pivots,
generate_pq_pivots_from_membuf,
};

pub mod statistics;
Expand Down
19 changes: 0 additions & 19 deletions diskann-providers/src/model/pq/distance/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,6 @@ where
}
}

impl<T> PreprocessedDistanceFunction<&Vec<u8>, f32> for QueryComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
fn evaluate_similarity(&self, changing: &Vec<u8>) -> f32 {
self.evaluate_similarity(changing.as_slice())
}
}

impl<T> PreprocessedDistanceFunction<&&[u8], f32> for QueryComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
fn evaluate_similarity(&self, changing: &&[u8]) -> f32 {
let changing: &[u8] = changing;
self.evaluate_similarity(changing)
}
}

/// Pre-dispatched distance functions for the `FixedChunkPQTable`.
#[derive(Debug)]
pub struct VTable {
Expand Down
10 changes: 8 additions & 2 deletions diskann-providers/src/model/pq/distance/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ use diskann_vector::{
use rand::{Rng, distr::Distribution};
use rand_distr::{Normal, Uniform};

use crate::model::{FixedChunkPQTable, pq::calculate_chunk_offsets_auto};
use crate::model::FixedChunkPQTable;
use diskann_quantization::views::ChunkOffsets;

/// We need a way to generate random queries.
///
Expand Down Expand Up @@ -130,7 +131,12 @@ pub(crate) fn generate_expected_vector(
/// * N + 1: The number of PQ Pivots
pub(crate) fn seed_pivot_table(config: TableConfig) -> FixedChunkPQTable {
// Get the chunk offsets for the selected dimension and bytes.
let offsets = calculate_chunk_offsets_auto(config.dim, config.pq_chunks);
let chunk_offsets = ChunkOffsets::from_dim(
std::num::NonZeroUsize::new(config.dim).unwrap(),
std::num::NonZeroUsize::new(config.pq_chunks).unwrap(),
)
.unwrap();
let offsets = chunk_offsets.as_slice();

// Create the pivot table following the schema described in the docstring.
let mut pivots = Vec::<f32>::new();
Expand Down
6 changes: 2 additions & 4 deletions diskann-providers/src/model/pq/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ pub use fixed_chunk_pq_table::{

mod pq_construction;
pub use pq_construction::{
MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ, NUM_PQ_CENTROIDS, accum_row_inplace,
calculate_chunk_offsets, calculate_chunk_offsets_auto, generate_pq_data_from_pivots,
MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ, NUM_PQ_CENTROIDS, generate_pq_data_from_pivots,
generate_pq_data_from_pivots_from_membuf, generate_pq_data_from_pivots_from_membuf_batch,
generate_pq_pivots, generate_pq_pivots_from_membuf, get_chunk_from_training_data,
move_train_data_by_centroid,
generate_pq_pivots, generate_pq_pivots_from_membuf, move_train_data_by_centroid,
};
Comment thread
arkrishn94 marked this conversation as resolved.

/// all metadata of individual sub-component files is written in first 4KB for unified files
Expand Down
Loading
Loading