Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 49 additions & 39 deletions cassandra/tablets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,53 @@
import sys
from operator import attrgetter
from threading import Lock
from typing import Optional
from uuid import UUID

# C-accelerated attrgetter avoids per-call lambda allocation overhead
_get_first_token = attrgetter("first_token")
_get_last_token = attrgetter("last_token")

# On Python >= 3.10, bisect.bisect_left supports the key= parameter and is
# implemented in C -- roughly 2.5-3.5x faster than the pure-Python fallback.
# Keep the fallback for Python < 3.10 (which lacks key= support).
if sys.version_info >= (3, 10):
from bisect import bisect_left
else:

def bisect_left(a, x, lo=0, hi=None, *, key=None):
"""Return the index where to insert item x in list a, assuming a is sorted.

The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(i, x) will
insert just before the leftmost x already there.

Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""

if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
# Note, the comparison uses "<" to match the
# __lt__() logic in list.sort() and in heapq.
if key is None:
while lo < hi:
mid = (lo + hi) // 2
if a[mid] < x:
lo = mid + 1
else:
hi = mid
return lo
while lo < hi:
mid = (lo + hi) // 2
if key(a[mid]) < x:
lo = mid + 1
else:
hi = mid
return lo


class Tablet(object):
"""
Expand Down Expand Up @@ -57,7 +103,7 @@ def get_tablet_for_key(self, keyspace, table, t):
if not tablet:
return None

id = bisect_left(tablet, t.value, key=lambda tablet: tablet.last_token)
id = bisect_left(tablet, t.value, key=_get_last_token)
if id < len(tablet) and t.value > tablet[id].first_token:
return tablet[id]
return None
Expand Down Expand Up @@ -94,12 +140,12 @@ def add_tablet(self, keyspace, table, tablet):
tablets_for_table = self._tablets.setdefault((keyspace, table), [])

# find first overlapping range
start = bisect_left(tablets_for_table, tablet.first_token, key=lambda t: t.first_token)
start = bisect_left(tablets_for_table, tablet.first_token, key=_get_first_token)
if start > 0 and tablets_for_table[start - 1].last_token > tablet.first_token:
start = start - 1

# find last overlapping range
end = bisect_left(tablets_for_table, tablet.last_token, key=lambda t: t.last_token)
end = bisect_left(tablets_for_table, tablet.last_token, key=_get_last_token)
if end < len(tablets_for_table) and tablets_for_table[end].first_token >= tablet.last_token:
end = end - 1

Expand All @@ -108,39 +154,3 @@ def add_tablet(self, keyspace, table, tablet):

tablets_for_table.insert(start, tablet)


# bisect.bisect_left implementation from Python 3.11, needed untill support for
# Python < 3.10 is dropped, it is needed to use `key` to extract last_token from
# Tablet list - better solution performance-wise than materialize list of last_tokens
def bisect_left(a, x, lo=0, hi=None, *, key=None):
"""Return the index where to insert item x in list a, assuming a is sorted.

The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(i, x) will
insert just before the leftmost x already there.

Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""

if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
# Note, the comparison uses "<" to match the
# __lt__() logic in list.sort() and in heapq.
if key is None:
while lo < hi:
mid = (lo + hi) // 2
if a[mid] < x:
lo = mid + 1
else:
hi = mid
return
while lo < hi:
mid = (lo + hi) // 2
if key(a[mid]) < x:
lo = mid + 1
else:
hi = mid
return lo
107 changes: 107 additions & 0 deletions tests/unit/test_tablets.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,110 @@ def test_add_tablet_intersecting_with_last(self):

self.compare_ranges(tablets_list, [(-8611686018427387905, -7917529027641081857),
(-5011686018427387905, -2987529027641081857)])


class BisectLeftFallbackTest(unittest.TestCase):
"""Tests for the pure-Python bisect_left fallback.

On Python >= 3.10 the stdlib C implementation is used, but we keep a
fallback for older interpreters. The original fallback had a bug: the
key=None branch executed ``return`` (returning None) instead of
``return lo``. These tests exercise the fallback directly regardless
of Python version.
"""

def _get_fallback(self):
"""Import the module and grab the fallback even on Python >= 3.10."""
import importlib, types, sys as _sys

# Re-execute the module body with a fake sys.version_info < 3.10
# so the else-branch is taken and the fallback is defined.
source_path = "cassandra/tablets.py"
with open(source_path) as f:
source = f.read()

fake_mod = types.ModuleType("_tablets_fallback")
fake_mod.__file__ = source_path

# Patch sys.version_info to (3, 9) so the else branch runs
original_vi = _sys.version_info
_sys.version_info = (3, 9, 0, "final", 0)
try:
exec(compile(source, source_path, "exec"), fake_mod.__dict__)
finally:
_sys.version_info = original_vi

return fake_mod.bisect_left

def test_key_none_returns_int(self):
"""The key=None branch must return an int, not None (bug fix)."""
bisect_left = self._get_fallback()
result = bisect_left([1, 3, 5, 7], 4)
self.assertEqual(result, 2)

def test_key_none_empty_list(self):
bisect_left = self._get_fallback()
self.assertEqual(bisect_left([], 42), 0)

def test_key_none_insert_at_start(self):
bisect_left = self._get_fallback()
self.assertEqual(bisect_left([10, 20, 30], 5), 0)

def test_key_none_insert_at_end(self):
bisect_left = self._get_fallback()
self.assertEqual(bisect_left([10, 20, 30], 40), 3)

def test_key_none_exact_match(self):
bisect_left = self._get_fallback()
# bisect_left returns leftmost position
self.assertEqual(bisect_left([10, 20, 20, 30], 20), 1)

def test_with_key_function(self):
"""key= branch should still work correctly."""
bisect_left = self._get_fallback()
pairs = [(1, "a"), (3, "b"), (5, "c"), (7, "d")]
idx = bisect_left(pairs, 4, key=lambda p: p[0])
self.assertEqual(idx, 2)

def test_lo_negative_raises(self):
bisect_left = self._get_fallback()
with self.assertRaises(ValueError):
bisect_left([1, 2, 3], 2, lo=-1)


class GetTabletForKeyTest(unittest.TestCase):
"""Tests for Tablets.get_tablet_for_key."""

def test_found(self):
t1 = Tablet(0, 100, [("host1", 0)])
t2 = Tablet(100, 200, [("host2", 0)])
t3 = Tablet(200, 300, [("host3", 0)])
tablets = Tablets({("ks", "tb"): [t1, t2, t3]})

class Token:
def __init__(self, v):
self.value = v

result = tablets.get_tablet_for_key("ks", "tb", Token(150))
self.assertIs(result, t2)

def test_not_found_empty(self):
tablets = Tablets({})

class Token:
def __init__(self, v):
self.value = v

self.assertIsNone(tablets.get_tablet_for_key("ks", "tb", Token(50)))

def test_not_found_outside_range(self):
t1 = Tablet(100, 200, [("host1", 0)])
tablets = Tablets({("ks", "tb"): [t1]})

class Token:
def __init__(self, v):
self.value = v

# Token value 50 is not > first_token (100) of the tablet whose
# last_token (200) is >= 50, so no match.
self.assertIsNone(tablets.get_tablet_for_key("ks", "tb", Token(50)))
Loading