scylladb · dkropachev · Jan 31, 2026 · Jan 31, 2026
diff --git a/docs/scylla-specific.rst b/docs/scylla-specific.rst
@@ -111,6 +111,40 @@ New Error Types
         raise
 
 
+Paging Differences
+------------------
+
+ScyllaDB has a built-in 1MB page size limit that Cassandra does not have. This means that even if you set a high ``fetch_size`` (e.g., 10000 rows), ScyllaDB may return fewer rows per page if the total response size exceeds 1MB.
+
+This behavior is particularly noticeable when:
+
+* Working with wide tables (many columns)
+* Using ``NumpyProtocolHandler`` where you want large arrays per page
+* Columns contain large values (blobs, long strings, etc.)
+
+For example, with a table containing 1000 columns, you might receive only 30-50 rows per page even with ``fetch_size=10000``.
+
+**Workaround:** If you need to receive more rows per page (up to ScyllaDB's 1MB limit), set ``default_fetch_size`` to ``None``:
+
+.. code:: python
+
+    from cassandra.cluster import Cluster
+    from cassandra.protocol import NumpyProtocolHandler
+    from cassandra.query import tuple_factory
+
+    cluster = Cluster()
+    session = cluster.connect(keyspace="mykeyspace")
+    session.row_factory = tuple_factory
+    session.client_protocol_handler = NumpyProtocolHandler
+    session.default_fetch_size = None  # Let ScyllaDB control page sizes
+
+    results = session.execute("SELECT * FROM wide_table")
+
+With ``default_fetch_size = None``, the driver won't request a specific page size, allowing ScyllaDB to fill pages up to its 1MB limit. This results in larger arrays when using ``NumpyProtocolHandler``.
+
+For more details on paging, see :ref:`query-paging`.
+
+
 Tablet Awareness
 ----------------
 

diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py
@@ -207,6 +207,120 @@ def verify_iterator_data(results):
     return count
 
 
+class NumpyWideTableTest(unittest.TestCase):
+    """
+    Test NumpyProtocolHandler with wide tables (many columns).
+
+    ScyllaDB has a built-in 1MB page size limit that can cause fewer rows
+    per page than requested when working with wide tables.
+
+    See: https://github.com/scylladb/python-driver/issues/65
+    """
+
+    N_COLUMNS = 200  # Number of int columns (plus primary key columns)
+    N_ROWS = 100
+
+    @classmethod
+    def setUpClass(cls):
+        cls.cluster = TestCluster()
+        cls.session = cls.cluster.connect()
+        cls.session.execute("CREATE KEYSPACE IF NOT EXISTS test_wide_table WITH replication = "
+                            "{ 'class' : 'SimpleStrategy', 'replication_factor': '1'}")
+        cls.session.set_keyspace("test_wide_table")
+
+        # Create a wide table with many int columns
+        columns = ["pk int", "ck int"]
+        columns += ["col{0} int".format(i) for i in range(cls.N_COLUMNS)]
+        cls.session.execute(
+            "CREATE TABLE wide_table ({0}, PRIMARY KEY (pk, ck))".format(", ".join(columns)),
+            timeout=120
+        )
+
+        # Insert test data
+        col_names = ["pk", "ck"] + ["col{0}".format(i) for i in range(cls.N_COLUMNS)]
+        placeholders = ", ".join(["%s"] * len(col_names))
+        insert_cql = "INSERT INTO wide_table ({0}) VALUES ({1})".format(
+            ", ".join(col_names), placeholders
+        )
+
+        for row_idx in range(cls.N_ROWS):
+            values = [0, row_idx] + [row_idx * 1000 + i for i in range(cls.N_COLUMNS)]
+            cls.session.execute(insert_cql, values, timeout=120)
+
+    @classmethod
+    def tearDownClass(cls):
+        drop_keyspace_shutdown_cluster("test_wide_table", cls.session, cls.cluster)
+
+    @notprotocolv1
+    @numpytest
+    def test_numpy_wide_table_paging(self):
+        """
+        Test that NumpyProtocolHandler works with wide tables.
+
+        With ScyllaDB's 1MB page size limit, wide tables may return fewer
+        rows per page than the fetch_size requests. This test verifies
+        that all data is still returned correctly across multiple pages.
+        """
+        cluster = TestCluster(
+            execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}
+        )
+        session = cluster.connect(keyspace="test_wide_table")
+        session.client_protocol_handler = NumpyProtocolHandler
+        session.default_fetch_size = 1000  # Request many rows per page
+
+        results = session.execute("SELECT * FROM wide_table")
+
+        # Count total rows across all pages
+        total_rows = 0
+        page_count = 0
+        for page in results:
+            page_count += 1
+            # Get row count from first column array
+            arr = page.get('pk')
+            if arr is not None:
+                total_rows += len(arr)
+
+        # Verify all rows were returned
+        self.assertEqual(total_rows, self.N_ROWS,
+                         "Expected {0} rows total, got {1} across {2} pages".format(
+                             self.N_ROWS, total_rows, page_count))
+
+        cluster.shutdown()
+
+    @notprotocolv1
+    @numpytest
+    def test_numpy_wide_table_no_fetch_size(self):
+        """
+        Test that setting fetch_size=None allows ScyllaDB to control page sizes.
+
+        This is the recommended workaround for getting larger pages with wide tables.
+        """
+        cluster = TestCluster(
+            execution_profiles={EXEC_PROFILE_DEFAULT: ExecutionProfile(row_factory=tuple_factory)}
+        )
+        session = cluster.connect(keyspace="test_wide_table")
+        session.client_protocol_handler = NumpyProtocolHandler
+        session.default_fetch_size = None  # Let server control page sizes
+
+        results = session.execute("SELECT * FROM wide_table")
+
+        # Count total rows across all pages
+        total_rows = 0
+        page_count = 0
+        for page in results:
+            page_count += 1
+            arr = page.get('pk')
+            if arr is not None:
+                total_rows += len(arr)
+
+        # Verify all rows were returned
+        self.assertEqual(total_rows, self.N_ROWS,
+                         "Expected {0} rows total, got {1} across {2} pages".format(
+                             self.N_ROWS, total_rows, page_count))
+
+        cluster.shutdown()
+
+
 class NumpyNullTest(BasicSharedKeyspaceUnitTestCase):
 
     @classmethod