Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions diskann-garnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@ added:

- `XB8`: When specifying vector input type, you can use `XB8` instead of `FP32`
to specify binary data in uint8 format, one byte per dimension.
- `SB8`: When specifying vector input type, you can use `SB8` instead of `FP32`
to specify binary data in int8 (signed) format, one byte per dimension.
- `XPREQ8`: This is a pseudo-quantizer that specifies the vector data will be
stored as full precision data in uint8 format.
- `Q8`: This is a pseudo-quantizer that specifies the vector data will be
stored as full precision data in int8 (signed) format.

Generally you will use `XB8` with `XPREQ8` to input and store uint8 vectors and
`FP32` with `NOQUANT` to input and store f32 vectors.
Generally you will use `XB8` with `XPREQ8` to input and store uint8 vectors,
`SB8` with `Q8` to input and store int8 vectors, and `FP32` with `NOQUANT` to
input and store f32 vectors.

Support for binary and scalar quantization is coming, along with support for
customizing the distance metric.
Expand Down
52 changes: 51 additions & 1 deletion diskann-garnet/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pub enum VectorValueType {
Invalid = 0,
FP32,
XB8,
SB8,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -245,6 +246,21 @@ pub unsafe extern "C" fn create_index(
ptr::null()
}
}
VectorQuantType::Q8 => {
if let Ok(index) = create_index_impl::<i8>(
quant_type,
config,
dim as usize,
metric_type,
max_degree as usize,
callbacks,
context,
) {
Arc::into_raw(index).cast::<c_void>()
} else {
ptr::null()
}
}
VectorQuantType::NoQuant => {
if let Ok(index) = create_index_impl::<f32>(
quant_type,
Expand Down Expand Up @@ -309,7 +325,7 @@ fn interpret_vector<'a>(
) -> Option<PolyCow<'a>> {
let vector_len_bytes = match vector_value_type {
VectorValueType::FP32 => vector_len * 4,
VectorValueType::XB8 => vector_len,
VectorValueType::XB8 | VectorValueType::SB8 => vector_len,
VectorValueType::Invalid => return None,
};

Expand All @@ -332,6 +348,20 @@ fn interpret_vector<'a>(
}
PolyCow::from(bp)
}
VectorQuantType::Q8 => {
let mut bp = if let Ok(bp) = Poly::broadcast(0u8, vector_len, AlignToEight) {
bp
} else {
return None;
};
for (idx, e) in bp.iter_mut().enumerate() {
let el_size = mem::size_of::<f32>();
*e = f32::from_le_bytes(
v[idx * el_size..(idx + 1) * el_size].try_into().unwrap(),
) as i8 as u8;
}
PolyCow::from(bp)
}
VectorQuantType::NoQuant if v.as_ptr().align_offset(4) == 0 => {
// pointer is correctly aligned to interpret as f32
PolyCow::from(v)
Expand Down Expand Up @@ -370,6 +400,26 @@ fn interpret_vector<'a>(
}
_ => return None,
},
VectorValueType::SB8 => match quant_type {
VectorQuantType::Q8 => PolyCow::from(v),
VectorQuantType::NoQuant => {
let mut fp = if let Ok(p) =
Poly::broadcast(0u8, vector_len_bytes * mem::size_of::<f32>(), AlignToEight)
{
p
} else {
return None;
};
for (fe, be) in bytemuck::cast_slice_mut::<u8, f32>(&mut fp)
.iter_mut()
.zip(v)
{
*fe = (*be as i8) as f32;
}
PolyCow::from(fp)
}
_ => return None,
},
};

Some(v)
Expand Down
271 changes: 271 additions & 0 deletions garnet-sb8-support.patch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
# Garnet SB8 (Signed Int8) Support — Manual Patch Instructions
#
# These changes add VectorValueType.SB8 and wire it through the Garnet server
# to match the DiskANN Rust FFI changes (VectorValueType::SB8 = 3, VectorQuantType::Q8).
#
# 4 files to edit:
#
# ══════════════════════════════════════════════════════════════════════
# 1. libs/server/Storage/Session/MainStore/VectorStoreOps.cs
# ══════════════════════════════════════════════════════════════════════
#
# In the VectorValueType enum, after the XB8 member, add SB8:
#
# FIND:
# /// <summary>
# /// Bytes (8 bit).
# /// </summary>
# XB8,
# }
#
# REPLACE WITH:
# /// <summary>
# /// Bytes (8 bit).
# /// </summary>
# XB8,
#
# /// <summary>
# /// Signed bytes (int8, [-128, 127]).
# /// </summary>
# SB8,
# }
#
#
# ══════════════════════════════════════════════════════════════════════
# 2. libs/server/Resp/Vector/RespServerSessionVectors.cs
# ══════════════════════════════════════════════════════════════════════
#
# --- 2a. In NetworkVADD, after the "XB8" parsing block, add an "SB8" block.
#
# FIND (the XB8 block in VADD, around line 115):
# else if (parseState.GetArgSliceByRef(curIx).Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8))
# {
# curIx++;
# if (curIx >= parseState.Count)
# {
# return AbortWithWrongNumberOfArguments("VADD");
# }
#
# var asBytes = parseState.GetArgSliceByRef(curIx).Span;
# curIx++;
#
# valueType = VectorValueType.XB8;
# values = asBytes;
# }
#
# REPLACE WITH:
# else if (parseState.GetArgSliceByRef(curIx).Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8))
# {
# curIx++;
# if (curIx >= parseState.Count)
# {
# return AbortWithWrongNumberOfArguments("VADD");
# }
#
# var asBytes = parseState.GetArgSliceByRef(curIx).Span;
# curIx++;
#
# valueType = VectorValueType.XB8;
# values = asBytes;
# }
# else if (parseState.GetArgSliceByRef(curIx).Span.EqualsUpperCaseSpanIgnoringCase("SB8"u8))
# {
# curIx++;
# if (curIx >= parseState.Count)
# {
# return AbortWithWrongNumberOfArguments("VADD");
# }
#
# var asBytes = parseState.GetArgSliceByRef(curIx).Span;
# curIx++;
#
# valueType = VectorValueType.SB8;
# values = asBytes;
# }
#
#
# --- 2b. In the quant guard (around line 349), allow Q8 through:
#
# FIND:
# if (quantType != VectorQuantType.XPreQ8 && quantType != VectorQuantType.NoQuant)
#
# REPLACE WITH:
# if (quantType != VectorQuantType.XPreQ8 && quantType != VectorQuantType.NoQuant && quantType != VectorQuantType.Q8)
#
#
# --- 2c. In NetworkVSIM, after the "XB8" parsing block, add an "SB8" block.
#
# FIND (the XB8 block in VSIM, around line 502):
# else if (kind.Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8))
# {
# if (curIx >= parseState.Count)
# {
# return AbortWithWrongNumberOfArguments("VSIM");
# }
#
# var asBytes = parseState.GetArgSliceByRef(curIx).Span;
#
# valueType = VectorValueType.XB8;
# values = asBytes;
# curIx++;
# }
#
# REPLACE WITH:
# else if (kind.Span.EqualsUpperCaseSpanIgnoringCase("XB8"u8))
# {
# if (curIx >= parseState.Count)
# {
# return AbortWithWrongNumberOfArguments("VSIM");
# }
#
# var asBytes = parseState.GetArgSliceByRef(curIx).Span;
#
# valueType = VectorValueType.XB8;
# values = asBytes;
# curIx++;
# }
# else if (kind.Span.EqualsUpperCaseSpanIgnoringCase("SB8"u8))
# {
# if (curIx >= parseState.Count)
# {
# return AbortWithWrongNumberOfArguments("VSIM");
# }
#
# var asBytes = parseState.GetArgSliceByRef(curIx).Span;
#
# valueType = VectorValueType.SB8;
# values = asBytes;
# curIx++;
# }
#
#
# ══════════════════════════════════════════════════════════════════════
# 3. libs/server/Resp/Vector/DiskANNService.cs
# ══════════════════════════════════════════════════════════════════════
#
# --- 3a. In the Insert method, add SB8 arm after XB8 (around line 79):
#
# FIND:
# else if (vectorType == VectorValueType.XB8)
# {
# vector_len = vector.Length;
# }
# else
# {
# throw new NotImplementedException($"{vectorType}");
# }
#
# REPLACE WITH:
# else if (vectorType == VectorValueType.XB8)
# {
# vector_len = vector.Length;
# }
# else if (vectorType == VectorValueType.SB8)
# {
# vector_len = vector.Length;
# }
# else
# {
# throw new NotImplementedException($"{vectorType}");
# }
#
#
# --- 3b. In SearchVector method, same pattern (around line 117):
#
# FIND:
# else if (vectorType == VectorValueType.XB8)
# {
# vector_len = vector.Length;
# }
# else
# {
# throw new NotImplementedException($"{vectorType}");
# }
#
# REPLACE WITH:
# else if (vectorType == VectorValueType.XB8)
# {
# vector_len = vector.Length;
# }
# else if (vectorType == VectorValueType.SB8)
# {
# vector_len = vector.Length;
# }
# else
# {
# throw new NotImplementedException($"{vectorType}");
# }
#
#
# ══════════════════════════════════════════════════════════════════════
# 4. libs/server/Resp/Vector/VectorManager.cs
# ══════════════════════════════════════════════════════════════════════
#
# --- 4a. In CalculateValueDimensions (around line 945), add SB8:
#
# FIND:
# else if (valueType == VectorValueType.XB8)
# {
# return (uint)(values.Length);
# }
# else
# {
# throw new NotImplementedException($"{valueType}");
# }
#
# REPLACE WITH:
# else if (valueType == VectorValueType.XB8)
# {
# return (uint)(values.Length);
# }
# else if (valueType == VectorValueType.SB8)
# {
# return (uint)(values.Length);
# }
# else
# {
# throw new NotImplementedException($"{valueType}");
# }
#
#
# --- 4b. In TryGetEmbedding (around line 920), add Q8 dequant before the
# throw NotImplementedException:
#
# FIND:
# else if (quantType == VectorQuantType.XPreQ8)
# {
# for (var i = 0; i < asBytes.Length; i++)
# {
# into[i] = from[i];
# }
# }
# else
# {
# // TODO: Handle Q8 and BIN as they are implemented
# throw new NotImplementedException($"Unexpected quantization: {quantType}");
# }
#
# REPLACE WITH:
# else if (quantType == VectorQuantType.XPreQ8)
# {
# for (var i = 0; i < asBytes.Length; i++)
# {
# into[i] = from[i];
# }
# }
# else if (quantType == VectorQuantType.Q8)
# {
# // Q8 stores signed bytes; dequantize by sign-extending to float
# for (var i = 0; i < asBytes.Length; i++)
# {
# into[i] = (float)(sbyte)from[i];
# }
# }
# else
# {
# throw new NotImplementedException($"Unexpected quantization: {quantType}");
# }
#
# ══════════════════════════════════════════════════════════════════════
# End of patch instructions.
# ══════════════════════════════════════════════════════════════════════
Loading