Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7a6c10a
make CreatePreprocessorsContainerParams templated and move it to head…
meiravgri May 8, 2025
cc4281a
plan for the tests
meiravgri May 8, 2025
74885a3
Merge remote-tracking branch 'origin/main' into meiravg_fix_blob_copy…
meiravgri May 11, 2025
86a44a9
rename original_blob_size-> input_blob_size
meiravgri May 12, 2025
3e15e76
preprocessors now change the blob size
meiravgri May 12, 2025
1863722
fix test
meiravgri May 12, 2025
55837ba
fix tiered test
meiravgri May 12, 2025
b1699ad
add assert storage_blob == nullptr || input_blob_size == processed_by…
meiravgri May 17, 2025
6dc543d
enable assert only in debug
meiravgri May 17, 2025
3e673b7
use constexpr for blob size
meiravgri May 17, 2025
8967d40
small docs changes
meiravgri May 18, 2025
674b136
review fixes
meiravgri May 27, 2025
d529f5e
ש
meiravgri May 27, 2025
af11142
notes and changes
dor-forer May 28, 2025
5461b97
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer May 28, 2025
eacd40f
Added tests and changes to the PP
dor-forer May 29, 2025
adec86b
frmat
dor-forer May 29, 2025
31a0c7d
Fix and add tests
dor-forer Jun 3, 2025
59fb16d
added tests for coverege
dor-forer Jun 4, 2025
866d8cb
format
dor-forer Jun 4, 2025
ec4a3a7
Remove the tests
dor-forer Jun 4, 2025
985c2c8
Fix test
dor-forer Jun 4, 2025
b7aeb2d
change to input output type
dor-forer Jun 5, 2025
b1fad81
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer Jun 5, 2025
a8aee99
format
dor-forer Jun 5, 2025
9ad3793
Merge branch 'main' into dorer-preprocessor-SQ8
meiravgri Dec 25, 2025
0004b3a
mkae pp templated
meiravgri Dec 25, 2025
1c25f2c
fix include
meiravgri Dec 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/VecSim/spaces/computer/preprocessor_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,17 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o

void *storage_blob = nullptr;
void *query_blob = nullptr;

// Use of separate variables for the storage_blob_size and query_blob_size, in case we need to
// change their sizes to different values.
size_t storage_blob_size = input_blob_size;
size_t query_blob_size = input_blob_size;

for (auto pp : preprocessors) {
if (!pp)
break;
pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size,
this->alignment);
}
// At least one blob was allocated.

Expand Down
177 changes: 176 additions & 1 deletion src/VecSim/spaces/computer/preprocessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@

#pragma once

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <memory>
#include <cassert>

#include "VecSim/memory/vecsim_base.h"
#include "VecSim/spaces/spaces.h"
Expand All @@ -23,8 +25,12 @@ class PreprocessorInterface : public VecsimBaseObject {
: VecsimBaseObject(allocator) {}
// Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
// are the same size.
// Use the the overload below for different sizes.
virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const = 0;
virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const = 0;
virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
size_t &input_blob_size) const = 0;
virtual void preprocessQuery(const void *original_blob, void *&query_blob,
Expand All @@ -42,6 +48,20 @@ class CosinePreprocessor : public PreprocessorInterface {
: PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
dim(dim), processed_bytes_count(processed_bytes_count) {}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const override {
// This assert verifies that that the current use of this function is for blobs of the same
// size, which is the case for the Cosine preprocessor. If we ever need to support different
// sizes for storage and query blobs, we can remove the assert and implement the logic to
// handle different sizes.
assert(storage_blob_size == query_blob_size);

preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
// Ensure both blobs have the same size after processing.
query_blob_size = storage_blob_size;
}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const override {
// This assert verifies that if a blob was allocated by a previous preprocessor, its
Expand Down Expand Up @@ -119,3 +139,158 @@ class CosinePreprocessor : public PreprocessorInterface {
const size_t dim;
const size_t processed_bytes_count;
};

/*
* QuantPreprocessor is a preprocessor that quantizes storage vectors from DataType to a
* lower precision representation using OUTPUT_TYPE (uint8_t).
* Query vectors remain as DataType for asymmetric distance computation.
*
* The quantized storage blob contains the quantized values along with metadata (min value and
* scaling factor) in a single contiguous blob. The quantization is done by finding the minimum and
* maximum values of the input vector, and then scaling the values to fit in the range of [0, 255].
*
* The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(DataType)
*/
template <typename DataType>
class QuantPreprocessor : public PreprocessorInterface {
using OUTPUT_TYPE = uint8_t;

public:
// Constructor for backward compatibility (single blob size)
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
: PreprocessorInterface(allocator), dim(dim),
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + 2 * sizeof(DataType)) {
} // quantized + min + delta

// Helper function to perform quantization. This function is used by both preprocess and
// preprocessQuery and supports in-place quantization of the storage blob.
void quantize(const DataType *input, OUTPUT_TYPE *quantized) const {
assert(input && quantized);
// Find min and max values
auto [min_val, max_val] = find_min_max(input);

// Calculate scaling factor
const DataType diff = (max_val - min_val);
const DataType delta = (diff == DataType{0}) ? DataType{1} : diff / DataType{255};
const DataType inv_delta = DataType{1} / delta;

// Quantize the values
for (size_t i = 0; i < this->dim; i++) {
quantized[i] = static_cast<OUTPUT_TYPE>(std::round((input[i] - min_val) * inv_delta));
}

DataType *metadata = reinterpret_cast<DataType *>(quantized + this->dim);

// Store min_val, delta, in the metadata
metadata[0] = min_val;
metadata[1] = delta;
}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const override {
// For backward compatibility - delegate to the two-size version with identical sizes
preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size,
alignment);
}

/**
* Quantizes the storage blob (DataType → OUTPUT_TYPE) while leaving the query blob unchanged.
*
* Storage vectors are quantized, while query vectors remain as DataType for asymmetric distance
* computation.
*
* Note: query_blob and query_blob_size are not modified, nor allocated by this function.
*/
void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const override {
// CASE 1: STORAGE BLOB NEEDS ALLOCATION
if (!storage_blob) {
// Allocate aligned memory for the quantized storage blob
storage_blob = static_cast<OUTPUT_TYPE *>(
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));

// Quantize directly from original data
const DataType *input = static_cast<const DataType *>(original_blob);
quantize(input, static_cast<OUTPUT_TYPE *>(storage_blob));
}
// CASE 2: STORAGE BLOB EXISTS
else {
// CASE 2A: STORAGE AND QUERY SHARE MEMORY
if (storage_blob == query_blob) {
// Need to allocate a separate storage blob since query remains DataType
// while storage needs to be quantized
void *new_storage =
this->allocator->allocate_aligned(this->storage_bytes_count, alignment);

// Quantize from the shared blob (query_blob) to the new storage blob
quantize(static_cast<const DataType *>(query_blob),
static_cast<OUTPUT_TYPE *>(new_storage));

// Update storage_blob to point to the new memory
storage_blob = new_storage;
}
// CASE 2B: SEPARATE STORAGE AND QUERY BLOBS
else {
// Check if storage blob needs resizing
if (storage_blob_size < this->storage_bytes_count) {
// Allocate new storage with correct size
OUTPUT_TYPE *new_storage = static_cast<OUTPUT_TYPE *>(
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));

// Quantize from old storage to new storage
quantize(static_cast<const DataType *>(storage_blob),
static_cast<OUTPUT_TYPE *>(new_storage));

// Free old storage and update pointer
this->allocator->free_allocation(storage_blob);
storage_blob = new_storage;
} else {
// Storage blob is large enough, quantize in-place
quantize(static_cast<const DataType *>(storage_blob),
static_cast<OUTPUT_TYPE *>(storage_blob));
}
}
}

storage_blob_size = this->storage_bytes_count;
}

void preprocessForStorage(const void *original_blob, void *&blob,
size_t &input_blob_size) const override {
// Allocate quantized blob if needed
if (!blob) {
blob = this->allocator->allocate(storage_bytes_count);
}

// Cast to appropriate types
const DataType *input = static_cast<const DataType *>(original_blob);
OUTPUT_TYPE *quantized = static_cast<OUTPUT_TYPE *>(blob);
quantize(input, quantized);

input_blob_size = storage_bytes_count;
}

void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
unsigned char alignment) const override {
// No-op: queries remain as original DataType
}

void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
assert(original_blob);
assert(input_blob_size >= storage_bytes_count &&
"Input buffer too small for in-place quantization");

quantize(static_cast<const DataType *>(original_blob),
static_cast<OUTPUT_TYPE *>(original_blob));
}

private:
std::pair<DataType, DataType> find_min_max(const DataType *input) const {
auto [min_it, max_it] = std::minmax_element(input, input + dim);
return {*min_it, *max_it};
}

const size_t dim;
const size_t storage_bytes_count;
};
Loading
Loading