Skip to content

Commit ee3c279

Browse files
committed
(fix) row_parser: restrict ParseDesc cache to prepared statements and clear on shutdown
Fix unbounded cache growth for non-prepared queries by splitting the cache logic into two paths: - _get_or_build_parse_desc: cached path, used only when column_metadata comes from result_metadata (prepared statements with stable id()). - _build_parse_desc: uncached path, used for inline metadata from non-prepared queries that creates a fresh list every execution. Also add clear_parse_desc_cache() call in Cluster.shutdown() to release cached ParseDesc entries (and their referenced type objects) when the cluster is torn down. Additional minor fixes: - Move <tuple> cast after None check to avoid casting None. - Expand comments documenting thread-safety guarantees and cache scope.
1 parent eb3ae98 commit ee3c279

2 files changed

Lines changed: 49 additions & 12 deletions

File tree

cassandra/cluster.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@
114114
except ImportError:
115115
from cassandra.util import WeakSet # NOQA
116116

117+
try:
118+
from cassandra.row_parser import clear_parse_desc_cache as _clear_parse_desc_cache
119+
except ImportError:
120+
_clear_parse_desc_cache = None
121+
117122
def _is_gevent_monkey_patched():
118123
if 'gevent.monkey' not in sys.modules:
119124
return False
@@ -1800,6 +1805,9 @@ def shutdown(self):
18001805

18011806
_discard_cluster_shutdown(self)
18021807

1808+
if _clear_parse_desc_cache is not None:
1809+
_clear_parse_desc_cache()
1810+
18031811
def __enter__(self):
18041812
return self
18051813

cassandra/row_parser.pyx

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,40 +21,61 @@ include "ioutils.pyx"
2121

2222
# Cache for ParseDesc objects keyed by id(column_metadata).
2323
# For prepared statements, result_metadata is stored on PreparedStatement
24-
# and reused across executions, so id() is stable.
24+
# and reused across executions, so id() is stable. The cache is only
25+
# populated on the prepared-statement path (where column_metadata comes from
26+
# result_metadata); inline metadata from non-prepared queries is always fresh
27+
# and must not be cached to avoid unbounded growth.
28+
#
2529
# Cache value: (column_metadata_ref, column_encryption_policy_ref,
2630
# protocol_version, desc, column_names, column_types)
31+
#
32+
# Thread safety: individual dict operations are atomic under CPython's GIL
33+
# and under free-threaded builds (PEP 703). This cache relies on that
34+
# guarantee; no additional locking is needed.
2735
cdef dict _parse_desc_cache = {}
2836

2937
cdef inline object _get_or_build_parse_desc(object column_metadata, object column_encryption_policy, int protocol_version):
30-
"""Look up or build a ParseDesc for the given column_metadata."""
38+
"""Look up or build a ParseDesc for the given column_metadata (cached path)."""
3139
cdef object cache_key = id(column_metadata)
32-
cdef tuple cached = <tuple>_parse_desc_cache.get(cache_key)
40+
cdef object cached_or_none = _parse_desc_cache.get(cache_key)
3341

34-
if cached is not None:
35-
# Verify identity the object at this id must be the same list
42+
if cached_or_none is not None:
43+
# Verify identity -- the object at this id must be the same list
3644
# and session-level settings must match
45+
cached = <tuple>cached_or_none
3746
if (cached[0] is column_metadata and
3847
cached[1] is column_encryption_policy and
3948
cached[2] == protocol_version):
4049
return cached # hit
4150

42-
# Cache miss build everything
51+
# Cache miss -- build everything
4352
cdef list column_names = [md[2] for md in column_metadata]
4453
cdef list column_types = [md[3] for md in column_metadata]
4554
cdef object desc = ParseDesc(
4655
column_names, column_types, column_encryption_policy,
4756
[ColDesc(md[0], md[1], md[2]) for md in column_metadata],
4857
make_deserializers(column_types), protocol_version)
4958

50-
cached = (column_metadata, column_encryption_policy, protocol_version,
51-
desc, column_names, column_types)
52-
_parse_desc_cache[cache_key] = cached
53-
return cached
59+
cdef tuple cached_entry = (column_metadata, column_encryption_policy,
60+
protocol_version, desc, column_names, column_types)
61+
_parse_desc_cache[cache_key] = cached_entry
62+
return cached_entry
63+
64+
65+
cdef inline object _build_parse_desc(object column_metadata, object column_encryption_policy, int protocol_version):
66+
"""Build a ParseDesc without caching (for non-prepared inline metadata)."""
67+
cdef list column_names = [md[2] for md in column_metadata]
68+
cdef list column_types = [md[3] for md in column_metadata]
69+
cdef object desc = ParseDesc(
70+
column_names, column_types, column_encryption_policy,
71+
[ColDesc(md[0], md[1], md[2]) for md in column_metadata],
72+
make_deserializers(column_types), protocol_version)
73+
return (column_metadata, column_encryption_policy, protocol_version,
74+
desc, column_names, column_types)
5475

5576

5677
def clear_parse_desc_cache():
57-
"""Clear the ParseDesc cache. Exposed for testing."""
78+
"""Clear the ParseDesc cache. Exposed for testing and Cluster.shutdown()."""
5879
_parse_desc_cache.clear()
5980

6081

@@ -68,7 +89,15 @@ def make_recv_results_rows(ColumnParser colparser):
6889

6990
column_metadata = self.column_metadata or result_metadata
7091

71-
cached = _get_or_build_parse_desc(column_metadata, column_encryption_policy, protocol_version)
92+
# Only use the cache for prepared statements (self.column_metadata is
93+
# None, so column_metadata comes from result_metadata which is a
94+
# stable list stored on PreparedStatement). Inline metadata from
95+
# non-prepared queries creates a fresh list every time and would
96+
# cause unbounded cache growth.
97+
if self.column_metadata is None and result_metadata is not None:
98+
cached = _get_or_build_parse_desc(column_metadata, column_encryption_policy, protocol_version)
99+
else:
100+
cached = _build_parse_desc(column_metadata, column_encryption_policy, protocol_version)
72101
self.column_names = cached[4]
73102
self.column_types = cached[5]
74103
desc = cached[3]

0 commit comments

Comments
 (0)