Skip to content

Commit c6c0584

Browse files
committed
(improvement) query: add Cython-aware serializer path in BoundStatement.bind()
When Cython serializers (from cassandra.serializers) are available and no column encryption policy is active, BoundStatement.bind() now uses pre-built Serializer objects cached on the PreparedStatement instead of calling cqltype classmethods. This avoids per-value Python method dispatch overhead and enables the ~30x vector serialization speedup from the Cython serializers module. The bind loop is split into three paths: 1. Column encryption policy path (unchanged behavior) 2. Cython serializers path (new fast path) 3. Plain Python path (no CE, no Cython -- removes per-value ColDesc/CE check) Depends on PR scylladb#748 (Cython serializers module) and PR scylladb#630 (CE-policy bind split).
1 parent e2dda9b commit c6c0584

2 files changed

Lines changed: 215 additions & 20 deletions

File tree

cassandra/query.py

Lines changed: 95 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
from cassandra.protocol import _UNSET_VALUE
3434
from cassandra.util import OrderedDict, _sanitize_identifiers
3535

36+
try:
37+
from cassandra.serializers import make_serializers as _cython_make_serializers
38+
_HAVE_CYTHON_SERIALIZERS = True
39+
except ImportError:
40+
_HAVE_CYTHON_SERIALIZERS = False
41+
3642
import logging
3743
log = logging.getLogger(__name__)
3844

@@ -474,6 +480,30 @@ def __init__(self, column_metadata, query_id, routing_key_indexes, query,
474480
self.is_idempotent = False
475481
self._is_lwt = is_lwt
476482

483+
@property
484+
def _serializers(self):
485+
"""Lazily create and cache Cython serializers for column types.
486+
487+
Returns a list of Serializer objects if Cython serializers are available
488+
and there is no column encryption policy, otherwise returns None.
489+
490+
The column_encryption_policy check is performed on every access (not
491+
cached) so that serializers are correctly bypassed if a policy is set
492+
after construction.
493+
"""
494+
if self.column_encryption_policy:
495+
return None
496+
try:
497+
return self.__serializers
498+
except AttributeError:
499+
pass
500+
if _HAVE_CYTHON_SERIALIZERS and self.column_metadata:
501+
self.__serializers = _cython_make_serializers(
502+
[col.type for col in self.column_metadata])
503+
else:
504+
self.__serializers = None
505+
return self.__serializers
506+
477507
@classmethod
478508
def from_message(cls, query_id, column_metadata, pk_indexes, cluster_metadata,
479509
query, prepared_keyspace, protocol_version, result_metadata,
@@ -532,6 +562,14 @@ def __str__(self):
532562
__repr__ = __str__
533563

534564

565+
def _raise_bind_serialize_error(col_spec, value, exc):
566+
"""Wrap serialization errors with column context for all bind loop paths."""
567+
actual_type = type(value)
568+
message = ('Received an argument of invalid type for column "%s". '
569+
'Expected: %s, Got: %s; (%s)' % (col_spec.name, col_spec.type, actual_type, exc))
570+
raise TypeError(message)
571+
572+
535573
class BoundStatement(Statement):
536574
"""
537575
A prepared statement that has been bound to a particular set of values.
@@ -636,28 +674,65 @@ def bind(self, values):
636674

637675
self.raw_values = values
638676
self.values = []
639-
for value, col_spec in zip(values, col_meta):
640-
if value is None:
641-
self.values.append(None)
642-
elif value is UNSET_VALUE:
643-
if proto_version >= 4:
644-
self._append_unset_value()
677+
if ce_policy:
678+
# Column encryption path: check each column for CE policy
679+
for value, col_spec in zip(values, col_meta):
680+
if value is None:
681+
self.values.append(None)
682+
elif value is UNSET_VALUE:
683+
if proto_version >= 4:
684+
self._append_unset_value()
685+
else:
686+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
645687
else:
646-
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
688+
try:
689+
col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name)
690+
uses_ce = ce_policy.contains_column(col_desc)
691+
if uses_ce:
692+
col_type = ce_policy.column_type(col_desc)
693+
col_bytes = col_type.serialize(value, proto_version)
694+
col_bytes = ce_policy.encrypt(col_desc, col_bytes)
695+
else:
696+
col_bytes = col_spec.type.serialize(value, proto_version)
697+
self.values.append(col_bytes)
698+
# OverflowError: Cython int32/float casts may raise on out-of-range values
699+
except (TypeError, struct.error, OverflowError) as exc:
700+
_raise_bind_serialize_error(col_spec, value, exc)
701+
else:
702+
# Fast path: no column encryption, use Cython serializers if available
703+
serializers = self.prepared_statement._serializers
704+
if serializers is not None:
705+
for ser, value, col_spec in zip(serializers, values, col_meta):
706+
if value is None:
707+
self.values.append(None)
708+
elif value is UNSET_VALUE:
709+
if proto_version >= 4:
710+
self._append_unset_value()
711+
else:
712+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
713+
else:
714+
try:
715+
col_bytes = ser.serialize(value, proto_version)
716+
self.values.append(col_bytes)
717+
# OverflowError: Cython int32/float casts may raise on out-of-range values
718+
except (TypeError, struct.error, OverflowError) as exc:
719+
_raise_bind_serialize_error(col_spec, value, exc)
647720
else:
648-
try:
649-
col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name)
650-
uses_ce = ce_policy and ce_policy.contains_column(col_desc)
651-
col_type = ce_policy.column_type(col_desc) if uses_ce else col_spec.type
652-
col_bytes = col_type.serialize(value, proto_version)
653-
if uses_ce:
654-
col_bytes = ce_policy.encrypt(col_desc, col_bytes)
655-
self.values.append(col_bytes)
656-
except (TypeError, struct.error) as exc:
657-
actual_type = type(value)
658-
message = ('Received an argument of invalid type for column "%s". '
659-
'Expected: %s, Got: %s; (%s)' % (col_spec.name, col_spec.type, actual_type, exc))
660-
raise TypeError(message)
721+
for value, col_spec in zip(values, col_meta):
722+
if value is None:
723+
self.values.append(None)
724+
elif value is UNSET_VALUE:
725+
if proto_version >= 4:
726+
self._append_unset_value()
727+
else:
728+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
729+
else:
730+
try:
731+
col_bytes = col_spec.type.serialize(value, proto_version)
732+
self.values.append(col_bytes)
733+
# OverflowError: Cython int32/float casts may raise on out-of-range values
734+
except (TypeError, struct.error, OverflowError) as exc:
735+
_raise_bind_serialize_error(col_spec, value, exc)
661736

662737
if proto_version >= 4:
663738
diff = col_meta_len - len(self.values)

tests/unit/test_parameter_binding.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,123 @@ def test_unset_value(self):
216216

217217
class BoundStatementTestV5(BoundStatementTestV4):
218218
protocol_version = 5
219+
220+
221+
class StubSerializer:
222+
"""Stub that mimics a Cython Serializer object for testing the fast path."""
223+
224+
def __init__(self, cqltype):
225+
self.cqltype = cqltype
226+
227+
def serialize(self, value, protocol_version):
228+
return self.cqltype.serialize(value, protocol_version)
229+
230+
231+
class OverflowSerializer:
232+
"""Stub that raises OverflowError, mimicking Cython <int32_t> cast overflow."""
233+
234+
def __init__(self, cqltype):
235+
self.cqltype = cqltype
236+
237+
def serialize(self, value, protocol_version):
238+
raise OverflowError('value too large to convert to int32_t')
239+
240+
241+
class CythonBindPathTest(unittest.TestCase):
242+
"""Tests for the Cython serializer fast path in BoundStatement.bind().
243+
244+
These tests inject stub serializers via the PreparedStatement's cached
245+
__serializers attribute to exercise the Cython bind branch without
246+
requiring compiled Cython.
247+
"""
248+
249+
protocol_version = 4
250+
251+
def _make_prepared(self, column_metadata, serializers=None):
252+
"""Create a PreparedStatement and inject serializers into its cache."""
253+
prepared = PreparedStatement(column_metadata=column_metadata,
254+
query_id=None,
255+
routing_key_indexes=[],
256+
query=None,
257+
keyspace='keyspace',
258+
protocol_version=self.protocol_version,
259+
result_metadata=None,
260+
result_metadata_id=None)
261+
# Inject directly into the name-mangled cache attribute used by
262+
# the _serializers property, bypassing the lazy initialization.
263+
prepared._PreparedStatement__serializers = serializers
264+
return prepared
265+
266+
def test_cython_path_normal_serialization(self):
267+
"""Cython fast path produces the same result as the plain Python path."""
268+
column_metadata = [ColumnMetadata('keyspace', 'cf', 'c0', Int32Type),
269+
ColumnMetadata('keyspace', 'cf', 'c1', Int32Type)]
270+
serializers = [StubSerializer(Int32Type), StubSerializer(Int32Type)]
271+
prepared = self._make_prepared(column_metadata, serializers)
272+
273+
bound = BoundStatement(prepared_statement=prepared)
274+
bound.bind((42, -1))
275+
assert bound.values == [Int32Type.serialize(42, self.protocol_version),
276+
Int32Type.serialize(-1, self.protocol_version)]
277+
278+
def test_cython_path_none_value(self):
279+
"""None values pass through the Cython path without serialization."""
280+
column_metadata = [ColumnMetadata('keyspace', 'cf', 'c0', Int32Type)]
281+
serializers = [StubSerializer(Int32Type)]
282+
prepared = self._make_prepared(column_metadata, serializers)
283+
284+
bound = BoundStatement(prepared_statement=prepared)
285+
bound.bind((None,))
286+
assert bound.values == [None]
287+
288+
def test_cython_path_unset_value(self):
289+
"""UNSET_VALUE is handled correctly in the Cython fast path (v4+)."""
290+
column_metadata = [ColumnMetadata('keyspace', 'cf', 'c0', Int32Type),
291+
ColumnMetadata('keyspace', 'cf', 'c1', Int32Type)]
292+
serializers = [StubSerializer(Int32Type), StubSerializer(Int32Type)]
293+
prepared = self._make_prepared(column_metadata, serializers)
294+
295+
bound = BoundStatement(prepared_statement=prepared)
296+
bound.bind((42, UNSET_VALUE))
297+
assert bound.values[0] == Int32Type.serialize(42, self.protocol_version)
298+
assert bound.values[1] == UNSET_VALUE
299+
300+
def test_cython_path_overflow_error_wrapped(self):
301+
"""OverflowError from Cython cast is caught and wrapped with column context."""
302+
column_metadata = [ColumnMetadata('keyspace', 'cf', 'v0', Int32Type)]
303+
serializers = [OverflowSerializer(Int32Type)]
304+
prepared = self._make_prepared(column_metadata, serializers)
305+
306+
bound = BoundStatement(prepared_statement=prepared)
307+
with pytest.raises(TypeError) as exc:
308+
bound.bind((2**31,))
309+
msg = str(exc.value)
310+
assert 'v0' in msg
311+
assert 'Int32Type' in msg
312+
assert 'int' in msg
313+
314+
def test_cython_path_type_error_wrapped(self):
315+
"""TypeError from serializer is caught and wrapped with column context."""
316+
column_metadata = [ColumnMetadata('keyspace', 'cf', 'v0', Int32Type)]
317+
serializers = [StubSerializer(Int32Type)]
318+
prepared = self._make_prepared(column_metadata, serializers)
319+
320+
bound = BoundStatement(prepared_statement=prepared)
321+
with pytest.raises(TypeError) as exc:
322+
bound.bind(('not_an_int',))
323+
msg = str(exc.value)
324+
assert 'v0' in msg
325+
assert 'Int32Type' in msg
326+
327+
def test_plain_path_overflow_error_wrapped(self):
328+
"""OverflowError in the plain Python path is also caught and wrapped."""
329+
column_metadata = [ColumnMetadata('keyspace', 'cf', 'v0', Int32Type)]
330+
# Force the plain Python path (no Cython serializers)
331+
prepared = self._make_prepared(column_metadata, serializers=None)
332+
333+
bound = BoundStatement(prepared_statement=prepared)
334+
with pytest.raises(TypeError) as exc:
335+
bound.bind((2**31,))
336+
msg = str(exc.value)
337+
assert 'v0' in msg
338+
assert 'Int32Type' in msg

0 commit comments

Comments
 (0)