Emit UserWarning when max_workers exceeds cap, revert cap to 3

Abel Milash · claude · Abel Milash · commit 2601bd4a31b9 · 2026-04-13T12:29:09.000-07:00
Replace silent capping in _dispatch_chunks with an explicit UserWarning
so callers are informed when their max_workers value is reduced. Revert
_MAX_WORKERS to 3. Remove two internal implementation comments from
_upsert_multiple. Update tests to assert the warning is emitted.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py
@@ -58,7 +58,7 @@
 _DEFAULT_EXPECTED_STATUSES: tuple[int, ...] = (200, 201, 202, 204)
 _MULTIPLE_BATCH_SIZE = 1000
 # Concurrent chunk dispatch settings
-_MAX_WORKERS = 3                # maximum concurrent worker threads; values above this are silently capped
+_MAX_WORKERS = 3                # maximum concurrent worker threads; values above this are capped
 _CHUNK_RETRY_LIMIT = 3          # max retries per chunk on transient errors
 _CHUNK_RETRY_DEFAULT_WAIT = 60  # seconds to wait when Retry-After header is absent
 _CHUNK_RETRY_JITTER_MAX = 5     # seconds of random jitter added to Retry-After to desynchronise workers
@@ -67,8 +67,8 @@
 def _dispatch_chunks(fn: Callable, chunks: List, max_workers: int) -> List:
     """Dispatch ``fn(chunk)`` for each chunk, sequentially or concurrently.
 
-    ``max_workers`` is silently capped to ``_MAX_WORKERS`` (3) so callers
-    that pass a larger value are not penalised with an error.
+    If ``max_workers`` exceeds ``_MAX_WORKERS`` (3) a :class:`UserWarning` is
+    issued and the value is capped.
 
     When ``max_workers == 1`` or there is only one chunk, runs sequentially
     with no thread overhead.  When ``max_workers > 1`` and there are multiple
@@ -85,10 +85,16 @@ def _dispatch_chunks(fn: Callable, chunks: List, max_workers: int) -> List:
 
     :param fn: Callable that accepts a single chunk and returns a result.
     :param chunks: List of chunks to process.
-    :param max_workers: Maximum number of concurrent worker threads (capped to ``_MAX_WORKERS``).
+    :param max_workers: Maximum number of concurrent worker threads.
     :return: List of results in chunk submission order.
     """
-    max_workers = min(max_workers, _MAX_WORKERS)
+    if max_workers > _MAX_WORKERS:
+        warnings.warn(
+            f"max_workers={max_workers} exceeds the maximum of {_MAX_WORKERS}; capping to {_MAX_WORKERS}.",
+            UserWarning,
+            stacklevel=2,
+        )
+        max_workers = _MAX_WORKERS
 
     def _execute_with_retry(chunk):
         for attempt in range(_CHUNK_RETRY_LIMIT + 1):
@@ -585,18 +591,11 @@ def _upsert_multiple(
            When input exceeds ``_MULTIPLE_BATCH_SIZE`` records, the operation is
            split into multiple requests and is **not atomic** across batches.
         """
-        # Validation uses ValueError (not ValidationError) because this is a
-        # caller-facing precondition check, not a service error.  The batch path
-        # (_build_upsert_multiple) raises ValidationError for the same conditions
-        # because batch errors carry structured subcodes.
         if len(alternate_keys) != len(records):
             raise ValueError(
                 f"alternate_keys and records must have the same length " f"({len(alternate_keys)} != {len(records)})"
             )
         logical_name = table_schema_name.lower()
-        # Pre-process all targets before chunking so that validation (key
-        # conflicts, label conversion) runs eagerly.  This means all records
-        # are held in memory at once, which is acceptable for typical workloads.
         targets: List[Dict[str, Any]] = []
         for alt_key, record in zip(alternate_keys, records):
             alt_key_lower = self._lowercase_keys(alt_key)
@@ -1386,15 +1385,15 @@ def _bulk_fetch_picklists(self, table_schema_name: str) -> None:
         """
         table_key = self._normalize_cache_key(table_schema_name)
         now = time.time()
-        # Fast path — lock-free read for the warm-cache case (common in sequential and
+        # Lock-free read for the warm-cache case (common in sequential and
         # subsequent concurrent calls once the cache is populated).
         table_entry = self._picklist_label_cache.get(table_key)
         if isinstance(table_entry, dict) and (now - table_entry.get("ts", 0)) < self._picklist_cache_ttl_seconds:
             return
 
-        # Slow path — serialise concurrent cold-start fetches so only one thread
-        # makes the metadata HTTP call.  Re-check inside the lock (double-checked
-        # locking) in case another thread populated the cache while we waited.
+        # Serialise concurrent cold-start fetches so only one thread makes the
+        # metadata HTTP call.  Re-check inside the lock (double-checked locking)
+        # in case another thread populated the cache while we waited.
         with self._picklist_cache_lock:
             now = time.time()
             table_entry = self._picklist_label_cache.get(table_key)
diff --git a/tests/unit/data/test_multiple_chunking.py b/tests/unit/data/test_multiple_chunking.py
@@ -714,7 +714,8 @@ def fn(_):
             call_count[0] += 1
             return "result"
 
-        results = self._dispatch(fn, [["only"]], max_workers=4)
+        with self.assertWarns(UserWarning):
+            results = self._dispatch(fn, [["only"]], max_workers=4)
         self.assertEqual(results, ["result"])
         self.assertEqual(call_count[0], 1)
 
@@ -809,7 +810,8 @@ def test_max_workers_above_cap_is_capped(self):
             "PowerPlatform.Dataverse.data._odata.ThreadPoolExecutor",
             wraps=ThreadPoolExecutor,
         ) as mock_pool:
-            results = self._dispatch(lambda c: c, chunks, max_workers=_MAX_WORKERS + 100)
+            with self.assertWarns(UserWarning):
+                results = self._dispatch(lambda c: c, chunks, max_workers=_MAX_WORKERS + 100)
 
         mock_pool.assert_called_once_with(max_workers=_MAX_WORKERS)
         self.assertEqual(results, chunks)
@@ -1088,30 +1090,36 @@ def test_create_true_max_workers_accepted(self):
 
 
 class TestDispatchChunksCap(unittest.TestCase):
-    """_dispatch_chunks silently caps max_workers to _MAX_WORKERS."""
+    """_dispatch_chunks caps max_workers to _MAX_WORKERS and emits a UserWarning."""
 
     def setUp(self):
         from PowerPlatform.Dataverse.data._odata import _dispatch_chunks, _MAX_WORKERS
 
         self._dispatch = _dispatch_chunks
         self._cap = _MAX_WORKERS
 
-    def test_above_cap_is_capped(self):
-        """max_workers above _MAX_WORKERS is silently capped; no error raised."""
+    def test_above_cap_emits_warning(self):
+        """max_workers above _MAX_WORKERS emits a UserWarning and still returns results."""
         called = []
 
         def fn(chunk):
             called.append(chunk)
             return chunk
 
-        # 2 chunks with max_workers above cap — should not raise
-        result = self._dispatch(fn, ["a", "b"], max_workers=self._cap + 10)
+        with self.assertWarns(UserWarning) as cm:
+            result = self._dispatch(fn, ["a", "b"], max_workers=self._cap + 10)
+
         self.assertEqual(result, ["a", "b"])
         self.assertEqual(called, ["a", "b"])
-
-    def test_exactly_at_cap_is_accepted(self):
-        """max_workers == _MAX_WORKERS dispatches concurrently without capping."""
-        results = self._dispatch(lambda c: c, ["x", "y"], max_workers=self._cap)
+        self.assertIn(str(self._cap + 10), str(cm.warning))
+        self.assertIn(str(self._cap), str(cm.warning))
+
+    def test_exactly_at_cap_no_warning(self):
+        """max_workers == _MAX_WORKERS dispatches without capping or warning."""
+        import warnings as _warnings
+        with _warnings.catch_warnings():
+            _warnings.simplefilter("error")
+            results = self._dispatch(lambda c: c, ["x", "y"], max_workers=self._cap)
         self.assertEqual(results, ["x", "y"])
 
     def test_max_workers_1_is_accepted(self):