Skip to content

Commit 97eb4e8

Browse files
committed
(improvement) deserializers: use direct PyUnicode_DecodeUTF8/ASCII from C buffer pointer
Replace the two-step to_bytes(buf).decode('utf8') pattern in DesUTF8Type and DesAsciiType with direct CPython C API calls (PyUnicode_DecodeUTF8 and PyUnicode_DecodeASCII). This eliminates an intermediate bytes object allocation per text cell — the old code created a Python bytes object from the C buffer pointer via to_bytes(buf), then immediately decoded it to str and discarded the bytes. Text (UTF8Type/VarcharType) is the most common CQL column type, so this optimization applies to the majority of cells in typical workloads. Benchmark results (Cython row parsing pipeline, median times): | Scenario | Before (original) | After (direct decode) | Speedup | |---------------------------------|-------------------:|----------------------:|--------:| | UTF8 1row x 1col short (11B) | 565 ns | 454 ns | 1.24x | | UTF8 1row x 10col short | 1,594 ns | 1,023 ns | 1.56x | | UTF8 100rows x 5col medium | 61,396 ns | 28,766 ns | 2.13x | | UTF8 1000rows x 5col medium | 547,145 ns | 290,361 ns | 1.88x | | UTF8 100rows x 5col long(200B) | 57,940 ns | 35,680 ns | 1.62x | | UTF8 100rows x 5col multibyte | 125,149 ns | 103,370 ns | 1.21x | | ASCII 100rows x 5col medium | 41,608 ns | 35,817 ns | 1.16x | | ASCII 1000rows x 5col medium | 416,350 ns | 374,341 ns | 1.11x | | Mixed 100rows 3text+2int | 44,646 ns | 31,189 ns | 1.43x | All existing unit tests pass (62 type tests, 116 total across key suites).
1 parent 9c53d78 commit 97eb4e8

3 files changed

Lines changed: 475 additions & 3 deletions

File tree

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Benchmarks for UTF-8 and ASCII deserialization in the Cython row parser.
17+
18+
This optimization replaces the two-step to_bytes(buf).decode('utf8') with
19+
a direct PyUnicode_DecodeUTF8(buf.ptr, buf.size, NULL) call, eliminating
20+
an intermediate bytes object allocation per text cell.
21+
22+
Requires: pip install pytest-benchmark
23+
24+
Run with: pytest benchmarks/utf8_decode_benchmark.py -v --benchmark-sort=name
25+
Compare before/after by running on master vs this branch.
26+
27+
Correctness tests live in tests/unit/cython/test_deserializers.py.
28+
"""
29+
30+
import struct
31+
import pytest
32+
33+
from cassandra.obj_parser import ListParser
34+
from cassandra.bytesio import BytesIOReader
35+
from cassandra.parsing import ParseDesc
36+
from cassandra.deserializers import make_deserializers
37+
from cassandra.cqltypes import UTF8Type, AsciiType, Int32Type
38+
from cassandra.policies import ColDesc
39+
40+
41+
def _build_text_rows_buffer(num_rows, num_cols, text_data):
42+
"""Build a binary buffer representing num_rows x num_cols of text data.
43+
44+
Format: [int32 row_count] [row1] [row2] ...
45+
Each row: [cell1] [cell2] ...
46+
Each cell: [int32 length] [data bytes]
47+
"""
48+
parts = [struct.pack(">i", num_rows)]
49+
cell = struct.pack(">i", len(text_data)) + text_data
50+
row = cell * num_cols
51+
parts.append(row * num_rows)
52+
return b"".join(parts)
53+
54+
55+
def _build_mixed_rows_buffer(num_rows, text_data, int_value=42):
56+
"""Build a buffer with mixed columns: 3 text + 2 int32."""
57+
parts = [struct.pack(">i", num_rows)]
58+
text_cell = struct.pack(">i", len(text_data)) + text_data
59+
int_cell = struct.pack(">i", 4) + struct.pack(">i", int_value)
60+
row = text_cell + text_cell + text_cell + int_cell + int_cell
61+
parts.append(row * num_rows)
62+
return b"".join(parts)
63+
64+
65+
def _make_text_desc(num_cols, protocol_version=4):
66+
"""Create a ParseDesc for num_cols text columns."""
67+
coltypes = [UTF8Type] * num_cols
68+
colnames = [f"col{i}" for i in range(num_cols)]
69+
coldescs = [ColDesc("ks", "tbl", f"col{i}") for i in range(num_cols)]
70+
desers = make_deserializers(coltypes)
71+
return ParseDesc(colnames, coltypes, None, coldescs, desers, protocol_version)
72+
73+
74+
def _make_ascii_desc(num_cols, protocol_version=4):
75+
"""Create a ParseDesc for num_cols ASCII columns."""
76+
coltypes = [AsciiType] * num_cols
77+
colnames = [f"col{i}" for i in range(num_cols)]
78+
coldescs = [ColDesc("ks", "tbl", f"col{i}") for i in range(num_cols)]
79+
desers = make_deserializers(coltypes)
80+
return ParseDesc(colnames, coltypes, None, coldescs, desers, protocol_version)
81+
82+
83+
def _make_mixed_desc(protocol_version=4):
84+
"""Create a ParseDesc for 3 text + 2 int32 columns."""
85+
coltypes = [UTF8Type, UTF8Type, UTF8Type, Int32Type, Int32Type]
86+
colnames = ["text0", "text1", "text2", "int0", "int1"]
87+
coldescs = [ColDesc("ks", "tbl", n) for n in colnames]
88+
desers = make_deserializers(coltypes)
89+
return ParseDesc(colnames, coltypes, None, coldescs, desers, protocol_version)
90+
91+
92+
# ---------------------------------------------------------------------------
93+
# Cython pipeline benchmarks — UTF-8
94+
# ---------------------------------------------------------------------------
95+
96+
97+
class TestUTF8CythonPipeline:
98+
"""Benchmark the full Cython row parsing pipeline with UTF-8 text columns.
99+
100+
These benchmarks measure the end-to-end cost of parsing result sets
101+
through the optimized Cython path. The optimization replaces
102+
to_bytes(buf).decode('utf8') with PyUnicode_DecodeUTF8(buf.ptr, buf.size, NULL),
103+
eliminating one intermediate bytes allocation per text cell.
104+
"""
105+
106+
def test_bench_utf8_1row_1col_short(self, benchmark):
107+
"""1 row x 1 col, short string (11 bytes) — isolates per-call overhead."""
108+
text = b"hello world"
109+
buf = _build_text_rows_buffer(1, 1, text)
110+
desc = _make_text_desc(1)
111+
parser = ListParser()
112+
113+
def parse():
114+
reader = BytesIOReader(buf)
115+
return parser.parse_rows(reader, desc)
116+
117+
result = benchmark(parse)
118+
assert len(result) == 1
119+
assert result[0][0] == "hello world"
120+
121+
def test_bench_utf8_1row_10col_short(self, benchmark):
122+
"""1 row x 10 cols, short strings — measures per-column overhead."""
123+
text = b"hello world"
124+
buf = _build_text_rows_buffer(1, 10, text)
125+
desc = _make_text_desc(10)
126+
parser = ListParser()
127+
128+
def parse():
129+
reader = BytesIOReader(buf)
130+
return parser.parse_rows(reader, desc)
131+
132+
result = benchmark(parse)
133+
assert len(result) == 1
134+
assert len(result[0]) == 10
135+
136+
def test_bench_utf8_100rows_5col_medium(self, benchmark):
137+
"""100 rows x 5 cols, medium string (46 bytes) — typical workload."""
138+
text = b"Hello, this is a test string for benchmarking!"
139+
buf = _build_text_rows_buffer(100, 5, text)
140+
desc = _make_text_desc(5)
141+
parser = ListParser()
142+
143+
def parse():
144+
reader = BytesIOReader(buf)
145+
return parser.parse_rows(reader, desc)
146+
147+
result = benchmark(parse)
148+
assert len(result) == 100
149+
assert result[0][0] == text.decode("utf8")
150+
151+
def test_bench_utf8_1000rows_5col_medium(self, benchmark):
152+
"""1000 rows x 5 cols, medium string — high-throughput scenario."""
153+
text = b"Hello, this is a test string for benchmarking!"
154+
buf = _build_text_rows_buffer(1000, 5, text)
155+
desc = _make_text_desc(5)
156+
parser = ListParser()
157+
158+
def parse():
159+
reader = BytesIOReader(buf)
160+
return parser.parse_rows(reader, desc)
161+
162+
result = benchmark(parse)
163+
assert len(result) == 1000
164+
165+
def test_bench_utf8_100rows_5col_long(self, benchmark):
166+
"""100 rows x 5 cols, long string (200 bytes) — larger values."""
167+
text = b"A" * 200
168+
buf = _build_text_rows_buffer(100, 5, text)
169+
desc = _make_text_desc(5)
170+
parser = ListParser()
171+
172+
def parse():
173+
reader = BytesIOReader(buf)
174+
return parser.parse_rows(reader, desc)
175+
176+
result = benchmark(parse)
177+
assert len(result) == 100
178+
assert result[0][0] == "A" * 200
179+
180+
def test_bench_utf8_100rows_5col_multibyte(self, benchmark):
181+
"""100 rows x 5 cols, multibyte UTF-8 string — tests non-ASCII."""
182+
text = "Héllo wörld! こんにちは 🌍".encode("utf-8")
183+
buf = _build_text_rows_buffer(100, 5, text)
184+
desc = _make_text_desc(5)
185+
parser = ListParser()
186+
187+
def parse():
188+
reader = BytesIOReader(buf)
189+
return parser.parse_rows(reader, desc)
190+
191+
result = benchmark(parse)
192+
assert len(result) == 100
193+
assert result[0][0] == text.decode("utf-8")
194+
195+
196+
# ---------------------------------------------------------------------------
197+
# Cython pipeline benchmarks — ASCII
198+
# ---------------------------------------------------------------------------
199+
200+
201+
class TestASCIICythonPipeline:
202+
"""Benchmark the Cython row parsing pipeline with ASCII text columns."""
203+
204+
def test_bench_ascii_100rows_5col_medium(self, benchmark):
205+
"""100 rows x 5 cols, medium ASCII string."""
206+
text = b"Hello, this is a test ASCII string for benchmarking!"
207+
buf = _build_text_rows_buffer(100, 5, text)
208+
desc = _make_ascii_desc(5)
209+
parser = ListParser()
210+
211+
def parse():
212+
reader = BytesIOReader(buf)
213+
return parser.parse_rows(reader, desc)
214+
215+
result = benchmark(parse)
216+
assert len(result) == 100
217+
assert result[0][0] == text.decode("ascii")
218+
219+
def test_bench_ascii_1000rows_5col_medium(self, benchmark):
220+
"""1000 rows x 5 cols, medium ASCII string."""
221+
text = b"Hello, this is a test ASCII string for benchmarking!"
222+
buf = _build_text_rows_buffer(1000, 5, text)
223+
desc = _make_ascii_desc(5)
224+
parser = ListParser()
225+
226+
def parse():
227+
reader = BytesIOReader(buf)
228+
return parser.parse_rows(reader, desc)
229+
230+
result = benchmark(parse)
231+
assert len(result) == 1000
232+
233+
234+
# ---------------------------------------------------------------------------
235+
# Mixed columns benchmark
236+
# ---------------------------------------------------------------------------
237+
238+
239+
class TestMixedColumnsPipeline:
240+
"""Benchmark with mixed column types (text + int) for realism."""
241+
242+
def test_bench_mixed_100rows_3text_2int(self, benchmark):
243+
"""100 rows x (3 text + 2 int) — realistic mixed schema."""
244+
text = b"Hello, this is a test string for benchmarking!"
245+
buf = _build_mixed_rows_buffer(100, text)
246+
desc = _make_mixed_desc()
247+
parser = ListParser()
248+
249+
def parse():
250+
reader = BytesIOReader(buf)
251+
return parser.parse_rows(reader, desc)
252+
253+
result = benchmark(parse)
254+
assert len(result) == 100
255+
assert result[0][0] == text.decode("utf8")
256+
assert result[0][3] == 42
257+
258+
259+
# ---------------------------------------------------------------------------
260+
# Python-level reference (bytes.decode) for comparison
261+
# ---------------------------------------------------------------------------
262+
263+
264+
class TestPythonDecodeReference:
265+
"""Python-level microbenchmark showing the overhead of creating
266+
intermediate bytes objects before decode, which is what the
267+
original Cython code did (to_bytes(buf).decode('utf8')).
268+
269+
These benchmarks isolate the bytes-creation overhead that the
270+
PyUnicode_DecodeUTF8 optimization eliminates.
271+
"""
272+
273+
def test_bench_python_bytes_decode_short(self, benchmark):
274+
"""Python reference: bytes.decode('utf8') for 500 short strings."""
275+
data = b"hello world"
276+
277+
def decode_loop():
278+
result = None
279+
for _ in range(500):
280+
result = data.decode("utf8")
281+
return result
282+
283+
result = benchmark(decode_loop)
284+
assert result == "hello world"
285+
286+
def test_bench_python_copy_then_decode_short(self, benchmark):
287+
"""Python reference: bytes(data).decode('utf8') for 500 short strings.
288+
This simulates the old to_bytes(buf).decode() pattern, where
289+
to_bytes() creates a new bytes object from the C buffer."""
290+
data = b"hello world"
291+
mv = memoryview(data)
292+
293+
def decode_loop():
294+
result = None
295+
for _ in range(500):
296+
copied = bytes(mv) # simulates to_bytes(buf)
297+
result = copied.decode("utf8")
298+
return result
299+
300+
result = benchmark(decode_loop)
301+
assert result == "hello world"
302+
303+
def test_bench_python_bytes_decode_medium(self, benchmark):
304+
"""Python reference: bytes.decode('utf8') for 500 medium strings."""
305+
data = b"Hello, this is a test string for benchmarking!"
306+
307+
def decode_loop():
308+
result = None
309+
for _ in range(500):
310+
result = data.decode("utf8")
311+
return result
312+
313+
result = benchmark(decode_loop)
314+
315+
def test_bench_python_copy_then_decode_medium(self, benchmark):
316+
"""Python reference: bytes(memoryview).decode('utf8') for 500 medium strings."""
317+
data = b"Hello, this is a test string for benchmarking!"
318+
mv = memoryview(data)
319+
320+
def decode_loop():
321+
result = None
322+
for _ in range(500):
323+
copied = bytes(mv) # simulates to_bytes(buf)
324+
result = copied.decode("utf8")
325+
return result
326+
327+
result = benchmark(decode_loop)

cassandra/deserializers.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515

1616
from libc.stdint cimport int32_t, uint16_t
17+
from cpython.unicode cimport PyUnicode_DecodeASCII, PyUnicode_DecodeUTF8
1718

1819
include 'cython_marshal.pyx'
1920
from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
@@ -88,7 +89,7 @@ cdef class DesAsciiType(Deserializer):
8889
cdef deserialize(self, Buffer *buf, int protocol_version):
8990
if buf.size == 0:
9091
return ""
91-
return to_bytes(buf).decode('ascii')
92+
return PyUnicode_DecodeASCII(buf.ptr, buf.size, NULL)
9293

9394

9495
cdef class DesFloatType(Deserializer):
@@ -173,8 +174,7 @@ cdef class DesUTF8Type(Deserializer):
173174
cdef deserialize(self, Buffer *buf, int protocol_version):
174175
if buf.size == 0:
175176
return ""
176-
cdef val = to_bytes(buf)
177-
return val.decode('utf8')
177+
return PyUnicode_DecodeUTF8(buf.ptr, buf.size, NULL)
178178

179179

180180
cdef class DesVarcharType(DesUTF8Type):

0 commit comments

Comments
 (0)