Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 70 additions & 1 deletion crates/edit/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

use std::hint::black_box;
use std::io::Cursor;
use std::ops::Range;
use std::{mem, vec};

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use edit::helpers::*;
use edit::simd::MemsetSafe;
use edit::{buffer, hash, oklab, simd, unicode};
use serde::Deserialize;
use stdext::arena;
use stdext::{arena, varint};

#[derive(Deserialize)]
pub struct EditingTracePatch(pub usize, pub usize, pub String);
Expand Down Expand Up @@ -227,6 +228,73 @@ fn bench_unicode(c: &mut Criterion) {
});
}

fn bench_varint(c: &mut Criterion) {
const BUFFER_SIZE: usize = MEBI;

let mut buffer = Vec::with_capacity(BUFFER_SIZE + 16);

// Knuth's MMIX LCG
let mut rng_state = 1442695040888963407u64;
let mut rng = || {
rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
rng_state as u32
};

// Bitmask with Rejection (as used by Apple)
let mut rng_state = 1442695040888963407u64;
let mut rng_range = |range: Range<u32>| {
let range_size = range.len() as u32;
let mask = range_size.next_power_of_two() - 1;
loop {
rng_state =
rng_state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
let value = rng_state as u32 & mask;
if value < range_size {
return range.start.wrapping_add(value);
}
}
};

loop {
// Generate values according to a non-uniform distribution.
// The distribution roughly corresponds to what LSH encounters.
let value = match rng() {
// ~35%: <=7 bits
0..1503238553 => rng_range(0..0x7F),
// ~40%: <=14 bits
1503238553..3221225472 => rng_range(0x80..0x3FFF),
// ~20%: <=21 bits
3221225472..4026531840 => rng_range(0x4000..0x1FFFFF),
// ~5%: u32::MAX
_ => (1 << 28) - 1,
};

buffer.extend(varint::encode(value));

if buffer.len() > BUFFER_SIZE {
break;
}
}

// As per the varint::decode() safety requirements, we need 8 bytes of padding.
// We pre-allocated `buffer` with extra capacity, so we technically fulfill that.
// _Technically_, however, we also make Rust unhappy, because it's uninitialized memory.
// It's just that I really really don't care about any such antics. It's memory.

c.benchmark_group("varint").bench_function("decode", |b| {
let mut off = 0;

b.iter(|| {
let (val, len) = unsafe { varint::decode(buffer.as_ptr().add(off)) };
black_box(val);
off += len;
if off >= buffer.len() {
off = 0;
}
});
});
}

fn bench(c: &mut Criterion) {
arena::init(128 * MEBI).unwrap();

Expand All @@ -238,6 +306,7 @@ fn bench(c: &mut Criterion) {
bench_simd_memset::<u32>(c);
bench_simd_memset::<u8>(c);
bench_unicode(c);
bench_varint(c);
}

criterion_group!(benches, bench);
Expand Down
1 change: 1 addition & 0 deletions crates/stdext/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

pub mod arena;
pub mod sys;
pub mod varint;

mod helpers;
pub use helpers::*;
140 changes: 140 additions & 0 deletions crates/stdext/src/varint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

//! Variable-length `u32` encoding and decoding, with efficient storage of `u32::MAX`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I don't understand - it's not a u32 encodig, it's a u28 encoding with a special case for u32::MAX and a pretty significant gap between 268435455 and 4294967295

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's fair. Perhaps I should move this into the lsh project now that I made it a library. 🤔 The reason it's an "u28" is because lsh really doesn't need values >2^28, while an efficient compression for a >2^28 value is still useful (it's used for setting the input offset to max. when matching a .*).

//! `u32::MAX` is a common value in Microsoft Edit's syntax highlighter bytecode.
//!
//! # Format
//!
//! ```text
//! 0-127 ( 7 bits): xxxxxxx0
//! 128-16383 (14 bits): xxxxxx01 yyyyyyyx
//! 16384-2097151 (21 bits): xxxxx011 yyyyyyxx zzzzzzzy
//! 2097152-268435455 (28 bits): xxxx0111 yyyyyxxx zzzzzzyy wwwwwwwz
//! 4294967295 (32 bits): ....1111
//! ```
//!
//! The least significant bits indicate the length, in a format identical to UTF-8. The remaining bits store
//! the value, in little-endian order. Little endian was chosen, as most architectures today use that.
//!
//! On x86, `tzcnt` (= `trailing_ones()` = what we need) has the benefit that its encoding is identical to `rep bsf`.
//! Older CPUs without BMI1 will ignore the `rep` prefix and use `bsf`, while modern CPUs will use the faster `tzcnt`.
//! So not just can we drop the need for `bswap` on x86, but we also speed up the bit count calculation.
//! This makes this encoding faster than LEB128, Google Varint, and others.

pub fn encode(val: u32) -> Vec<u8> {
let mut result = Vec::with_capacity(5);
let shift = match val {
0..0x80 => 0,
0x80..0x4000 => 1,
0x4000..0x200000 => 2,
0x200000..0x10000000 => 3,
_ => {
result.push(0xff);
return result;
}
};
let marker = (1u32 << shift) - 1;
let encoded = (val << (shift + 1)) | marker;
let bytes = encoded.to_le_bytes();
result.extend_from_slice(&bytes[..=shift]);
result
}

/// # Safety
///
/// The caller must ensure that `data..data+4` is valid memory.
/// It doesn't need to be a valid value, but it must be readable.
pub unsafe fn decode(data: *const u8) -> (u32, usize) {
// For inputs such as:
// [0xff, 0xff, 0xff, 0xff]
// the shifts below will shift by more than 31 digits, which Rust considers undefined behavior.
// *We explicitly want UB here*.
//
// If we write an if condition here (like this one), LLVM will turn that into a proper branch. Since our inputs
// are relatively random, that branch will mispredict, hurting performance. The if condition at the end
// gets turned into conditional moves (good!), but that only works because it comes after the shifts.
// Unfortunately, there's no way to ask Rust for "platform-defined behavior" (`unchecked_shl/shr` is not it).
#[cfg(debug_assertions)]
unsafe {
if (*data & 0x0f) == 0x0f {
return (u32::MAX, 1);
}
}

unsafe {
let val = u32::from_le((data as *const u32).read_unaligned());
let ones = val.trailing_ones();

let mut len = ones as usize + 1;
let mut res = 'bextr: {
// Give LLVM a helping hand for x86 CPUs with BMI1. It's not smart enough to figure out that `bextr` can
// be used here. To be fair, it's not faster, so maybe that's why. It is _a lot_ more compact, however.
#[cfg(target_feature = "bmi1")]
break 'bextr std::arch::x86_64::_bextr_u32(val, len as u32, (7 * len) as u32);

// This is where you'd put more architecture-specific optimizations.
// In fact this is where I'd put my ARM optimizations, but it doesn't have anything like `bextr`. :(

let mut res = val;
// Shift out the bytes we read but don't need.
res <<= 32 - 8 * len;
// Shift back down and remove the trailing 0/10/110/1110/1111 length bits.
res >>= 32 - 7 * len;
break 'bextr res;
};

// If the lead byte indicates >28 bits, assume `u32::MAX`.
// This doubles as a simple form of error correction.
if len > 4 {
res = u32::MAX;
len = 1;
}

(res, len)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_encode_decode_roundtrip() {
// Test various boundary values
let test_values = [
0u32,
1,
123,
127, // Max 1 byte
128, // Min 2 bytes
1234,
16383, // Max 2 bytes
16384, // Min 3 bytes
2097151, // Max 3 bytes
2097152, // Min 4 bytes
268435455, // Max 4 bytes
u32::MAX, // Special case
];

for &val in &test_values {
let encoded = encode(val);
println!("Value {} encoded as: {:02X?}", val, encoded);
let (decoded, len) = unsafe { decode(encoded.as_ptr()) };
println!(" Decoded as: {} with length {}", decoded, len);
assert_eq!(decoded, val, "Failed roundtrip for value {}", val);
assert_eq!(len, encoded.len(), "Length mismatch for value {}", val);
}
}

#[test]
fn test_specific_encodings() {
// Test specific byte patterns
unsafe {
assert_eq!((0, 1), decode([0, 0xbb, 0xcc, 0xdd].as_ptr()));
assert_eq!((123, 1), decode([0xf6, 0xbb, 0xcc, 0xdd].as_ptr()));
assert_eq!((1234, 2), decode([0x49, 0x13, 0xcc, 0xdd].as_ptr()));
assert_eq!((u32::MAX, 1), decode([0xff, 0xbb, 0xcc, 0xdd].as_ptr()));
}
}
}