Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ default = ["std"]
std = []
serde = ["serde/alloc"]
bytes = []
simd = []

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }


[[bench]]
name = "cheetah"
harness = false

[[bench]]
name = "simd"
harness = false
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ CheetahString is a versatile string type that goes beyond the standard library's
- **⚡ Performance Focused**
- Optimized for common string operations
- Reduced memory allocations via intelligent internal representation
- Optional SIMD acceleration for string matching operations (x86_64 SSE2)
- Benchmarked against standard library types

- **🛡️ Safe & Correct**
Expand All @@ -44,20 +45,21 @@ Add this to your `Cargo.toml`:

```toml
[dependencies]
cheetah-string = "0.1"
cheetah-string = "1.0.0"
```

### Optional Features

```toml
[dependencies]
cheetah-string = { version = "0.1", features = ["bytes", "serde"] }
cheetah-string = { version = "1.0.0", features = ["bytes", "serde", "simd"] }
```

Available features:
- `std` (default): Enable standard library support
- `bytes`: Integration with the `bytes` crate
- `serde`: Serialization support via serde
- `simd`: SIMD-accelerated string operations (x86_64 SSE2)

## 🚀 Quick Start

Expand All @@ -74,8 +76,10 @@ let small = CheetahString::from("short"); // Stored inline!

// String operations
let s = CheetahString::from("Hello, World!");
assert!(s.starts_with("Hello"));
assert!(s.starts_with("Hello")); // Supports &str
assert!(s.starts_with('H')); // Also supports char
assert!(s.contains("World"));
assert!(s.contains('W'));
assert_eq!(s.to_lowercase(), "hello, world!");

// Concatenation
Expand All @@ -101,10 +105,14 @@ CheetahString is designed with performance in mind:
- **Small String Optimization (SSO)**: Strings up to 23 bytes are stored inline without heap allocation
- **Efficient Sharing**: Large strings use `Arc<str>` for cheap cloning
- **Optimized Operations**: Common operations like concatenation have fast-path implementations
- **SIMD Acceleration** (with `simd` feature): String matching operations (`starts_with`, `ends_with`, `contains`, `find`, equality comparisons) are accelerated using SSE2 SIMD instructions on x86_64 platforms. The implementation automatically falls back to scalar code for small inputs or when SIMD is not available.

Run benchmarks:
```bash
cargo bench

# With SIMD feature
cargo bench --features simd
```

## 🔍 Internal Representation
Expand All @@ -131,7 +139,7 @@ CheetahString intelligently chooses the most efficient storage:

### Query Methods
- `len()`, `is_empty()`, `as_str()`, `as_bytes()`
- `starts_with()`, `ends_with()`, `contains()`
- `starts_with()`, `ends_with()`, `contains()` - Support both `&str` and `char` patterns
- `find()`, `rfind()`

### Transformation
Expand All @@ -141,8 +149,8 @@ CheetahString intelligently chooses the most efficient storage:
- `substring()`, `repeat()`

### Iteration
- `chars()` - Iterate over characters
- `split()` - Split by pattern
- `chars()` - Iterate over characters (double-ended iterator)
- `split()` - Split by pattern (supports `&str` and `char`)
- `lines()` - Iterate over lines

### Mutation
Expand Down
164 changes: 164 additions & 0 deletions benches/simd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
use cheetah_string::CheetahString;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};

fn bench_equality(c: &mut Criterion) {
let mut group = c.benchmark_group("equality");

for size in [16, 32, 64, 128, 256, 512, 1024, 4096] {
let s1 = CheetahString::from("a".repeat(size));
let s2 = CheetahString::from("a".repeat(size));
let s3 = CheetahString::from(format!("{}b", "a".repeat(size - 1)));

group.throughput(Throughput::Bytes(size as u64));

group.bench_with_input(BenchmarkId::new("equal", size), &size, |b, _| {
b.iter(|| black_box(&s1) == black_box(&s2))
});

group.bench_with_input(BenchmarkId::new("not_equal", size), &size, |b, _| {
b.iter(|| black_box(&s1) == black_box(&s3))
});
}

group.finish();
}

fn bench_starts_with(c: &mut Criterion) {
let mut group = c.benchmark_group("starts_with");

for size in [16, 32, 64, 128, 256, 512, 1024, 4096] {
let haystack = CheetahString::from("a".repeat(size));
let needle_match = "a".repeat(size / 2);
let needle_no_match = "b".repeat(size / 2);

group.throughput(Throughput::Bytes(size as u64));

group.bench_with_input(BenchmarkId::new("match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).starts_with(black_box(&needle_match)))
});

group.bench_with_input(BenchmarkId::new("no_match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).starts_with(black_box(&needle_no_match)))
});
}

group.finish();
}

fn bench_ends_with(c: &mut Criterion) {
let mut group = c.benchmark_group("ends_with");

for size in [16, 32, 64, 128, 256, 512, 1024, 4096] {
let haystack = CheetahString::from("a".repeat(size));
let needle_match = "a".repeat(size / 2);
let needle_no_match = "b".repeat(size / 2);

group.throughput(Throughput::Bytes(size as u64));

group.bench_with_input(BenchmarkId::new("match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).ends_with(black_box(&needle_match)))
});

group.bench_with_input(BenchmarkId::new("no_match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).ends_with(black_box(&needle_no_match)))
});
}

group.finish();
}

fn bench_contains(c: &mut Criterion) {
let mut group = c.benchmark_group("contains");

for size in [16, 32, 64, 128, 256, 512, 1024, 4096] {
let haystack =
CheetahString::from(format!("{}x{}", "a".repeat(size / 2), "a".repeat(size / 2)));
let needle_match = "x";
let needle_no_match = "z";

group.throughput(Throughput::Bytes(size as u64));

group.bench_with_input(BenchmarkId::new("match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).contains(black_box(needle_match)))
});

group.bench_with_input(BenchmarkId::new("no_match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).contains(black_box(needle_no_match)))
});
}

group.finish();
}

fn bench_find(c: &mut Criterion) {
let mut group = c.benchmark_group("find");

for size in [16, 32, 64, 128, 256, 512, 1024, 4096] {
let haystack =
CheetahString::from(format!("{}x{}", "a".repeat(size / 2), "a".repeat(size / 2)));
let needle_match = "x";
let needle_no_match = "z";

group.throughput(Throughput::Bytes(size as u64));

group.bench_with_input(BenchmarkId::new("match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).find(black_box(needle_match)))
});

group.bench_with_input(BenchmarkId::new("no_match", size), &size, |b, _| {
b.iter(|| black_box(&haystack).find(black_box(needle_no_match)))
});
}

group.finish();
}

fn bench_realistic_workload(c: &mut Criterion) {
let mut group = c.benchmark_group("realistic");

// Simulate URL parsing
let url = CheetahString::from("https://api.example.com/v1/users/12345?filter=active&sort=name");

group.bench_function("url_parsing", |b| {
b.iter(|| {
black_box(&url).starts_with("https://")
&& black_box(&url).contains("api")
&& black_box(&url).contains("users")
})
});

// Simulate log filtering
let log =
CheetahString::from("[2024-01-01 12:00:00] INFO: Processing request for user_id=12345");

group.bench_function("log_filtering", |b| {
b.iter(|| {
black_box(&log).starts_with("[2024")
&& black_box(&log).contains("INFO")
&& black_box(&log).contains("user_id")
})
});

// Simulate content type checking
let content_type = CheetahString::from("application/json; charset=utf-8");

group.bench_function("content_type_check", |b| {
b.iter(|| {
black_box(&content_type).starts_with("application/")
&& black_box(&content_type).contains("json")
})
});

group.finish();
}

criterion_group!(
benches,
bench_equality,
bench_starts_with,
bench_ends_with,
bench_contains,
bench_find,
bench_realistic_workload
);
criterion_main!(benches);
102 changes: 102 additions & 0 deletions examples/simd_demo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Example demonstrating SIMD-accelerated string operations in CheetahString
// Run with: cargo run --example simd_demo --features simd

use cheetah_string::CheetahString;

fn main() {
println!("CheetahString SIMD Demo");
println!("=======================\n");

// Example 1: Equality comparison
println!("1. Equality Comparison:");
let s1 = CheetahString::from("Hello, World! This is a SIMD-accelerated string comparison.");
let s2 = CheetahString::from("Hello, World! This is a SIMD-accelerated string comparison.");
let s3 = CheetahString::from("Hello, World! This is a different string.");

println!(" s1 == s2: {}", s1 == s2); // true (uses SIMD for long strings)
println!(" s1 == s3: {}\n", s1 == s3); // false

// Example 2: starts_with
println!("2. String Prefix Matching:");
let url = CheetahString::from("https://api.example.com/v1/users/12345?filter=active&sort=name");
println!(" URL: {}", url);
println!(" Starts with 'https://': {}", url.starts_with("https://"));
println!(" Starts with 'http://': {}\n", url.starts_with("http://"));

// Example 3: ends_with
println!("3. String Suffix Matching:");
let filename = CheetahString::from("document.pdf");
println!(" Filename: {}", filename);
println!(" Ends with '.pdf': {}", filename.ends_with(".pdf"));
println!(" Ends with '.txt': {}\n", filename.ends_with(".txt"));

// Example 4: contains
println!("4. Substring Search:");
let log = CheetahString::from(
"[2024-01-01 12:00:00] INFO: Processing request for user_id=12345 from ip=192.168.1.100",
);
println!(" Log entry: {}", log);
println!(" Contains 'INFO': {}", log.contains("INFO"));
println!(" Contains 'ERROR': {}", log.contains("ERROR"));
println!(" Contains 'user_id': {}\n", log.contains("user_id"));

// Example 5: find
println!("5. Pattern Finding:");
let text = CheetahString::from("The quick brown fox jumps over the lazy dog");
println!(" Text: {}", text);
if let Some(pos) = text.find("fox") {
println!(" Found 'fox' at position: {}", pos);
}
if let Some(pos) = text.find("lazy") {
println!(" Found 'lazy' at position: {}", pos);
}
if text.find("cat").is_none() {
println!(" 'cat' not found\n");
}

// Example 6: Real-world use case - URL validation
println!("6. Real-world Use Case - URL Validation:");
let urls = vec![
"https://secure.example.com/api/v1/data",
"http://insecure.example.com/page",
"ftp://files.example.com/download",
];

for url in urls {
let url_str = CheetahString::from(url);
let is_secure = url_str.starts_with("https://");
let is_api = url_str.contains("/api/");
println!(
" URL: {} - Secure: {}, API endpoint: {}",
url, is_secure, is_api
);
}
println!();

// Example 7: Performance-sensitive pattern matching
println!("7. Log Processing Example:");
let logs = vec![
"[2024-01-01 10:00:00] ERROR: Database connection failed",
"[2024-01-01 10:01:00] INFO: Retrying connection...",
"[2024-01-01 10:02:00] INFO: Connection established",
"[2024-01-01 10:03:00] WARN: High memory usage detected",
];

let mut errors = 0;
let mut warnings = 0;

for log in logs {
let log_str = CheetahString::from(log);
if log_str.contains("ERROR") {
errors += 1;
println!(" Error found: {}", log);
} else if log_str.contains("WARN") {
warnings += 1;
println!(" Warning found: {}", log);
}
}

println!("\n Summary: {} errors, {} warnings", errors, warnings);
println!("\nNote: When compiled with --features simd, these operations use SSE2 SIMD");
println!(" instructions for improved performance on longer strings (>= 16 bytes).");
}
Loading
Loading