Add max_workers concurrency support to bulk create/update/upsert

Abel Milash · Abel Milash · commit 62c0ab610b93 · 2026-04-11T18:15:15.000-07:00
diff --git a/.claude/skills/dataverse-sdk-use/SKILL.md b/.claude/skills/dataverse-sdk-use/SKILL.md
@@ -25,7 +25,7 @@ Use the PowerPlatform Dataverse Client Python SDK to interact with Microsoft Dat
 - `client.batch` -- batch multiple operations into a single HTTP request
 
 ### Bulk Operations
-The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()`, or `upsert()` for automatic bulk processing; for `delete()`, set `use_bulk_delete=True`. Lists exceeding 1,000 records are automatically split into 1,000-record chunks — no manual pre-splitting needed. By default chunks are dispatched sequentially; pass `max_workers=N` (recommended: 3–4) to dispatch chunks concurrently via threads. Operations across chunks are **not atomic**: a failure mid-way may leave earlier chunks applied. Callers that require atomicity should limit their input to ≤ 1,000 records.
+The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()`, or `upsert()` for automatic bulk processing; for `delete()`, pass a list and set `use_bulk_delete=True` to use bulk operation. Lists exceeding 1,000 records are automatically split into 1,000-record chunks — no manual pre-splitting needed. By default chunks are dispatched sequentially; pass `max_workers=N` (max: 3, default: 1) to dispatch chunks concurrently via threads. Operations across chunks are **not atomic**: a failure mid-way may leave earlier chunks applied. Callers that require atomicity should limit their input to ≤ 1,000 records.
 
 ### Paging
 - Control page size with `page_size` parameter
@@ -465,12 +465,11 @@ except ValidationError as e:
 ### Performance Optimization
 
 1. **Use bulk operations** - Pass lists to create/update/delete for automatic optimization
-2. **Use `max_workers`** - Pass `max_workers=3` (or 4) to dispatch 1,000-record chunks concurrently; safe for large datasets where throughput matters more than strict sequential ordering. Dataverse throttles concurrent requests server-side, so values above 4 rarely help and may trigger 429 rate-limiting
-3. **Specify select fields** - Limit returned columns to reduce payload size
-4. **Control page size** - Use `top` and `page_size` parameters appropriately
-5. **Reuse client instances** - Don't create new clients for each operation
-6. **Use production credentials** - ClientSecretCredential or CertificateCredential for unattended operations
-7. **Error handling** - Implement retry logic for transient errors (`e.is_transient`)
+2. **Specify select fields** - Limit returned columns to reduce payload size
+3. **Control page size** - Use `top` and `page_size` parameters appropriately
+4. **Reuse client instances** - Don't create new clients for each operation
+5. **Use production credentials** - ClientSecretCredential or CertificateCredential for unattended operations
+6. **Error handling** - Implement retry logic for transient errors (`e.is_transient`)
 7. **Always include customization prefix** for custom tables/columns
 8. **Use lowercase for column names, match `$metadata` for navigation properties** - Column names in `$select`/`$filter`/record payloads use lowercase LogicalNames. Navigation properties in `$expand` and `@odata.bind` keys are case-sensitive and must match the entity's `$metadata` (PascalCase for custom lookups like `new_CustomerId`, lowercase for system lookups like `parentaccountid`)
 9. **Test in non-production environments** first
diff --git a/README.md b/README.md
@@ -185,15 +185,14 @@ client.records.update("account", ids, {"industry": "Technology"})
 # Bulk delete
 client.records.delete("account", ids, use_bulk_delete=True)
 
-# Concurrent chunk dispatch — max_workers sends chunks in parallel via threads
-# Recommended for large datasets when latency matters more than strict ordering
+# Concurrent chunk dispatch — max_workers sends chunks in parallel via threads (max: 3, default: 1)
 ids = client.records.create("account", payloads, max_workers=3)
 client.records.update("account", ids, {"industry": "Technology"}, max_workers=3)
 ```
 
 > **Large batches**: Lists exceeding 1,000 records are automatically split into 1,000-record
 > chunks — no manual pre-splitting needed. By default chunks are dispatched sequentially.
-> Pass `max_workers=N` (recommended: 3–4) to send chunks concurrently via threads — useful
+> Pass `max_workers=N` (max: 3, default: 1) to send chunks concurrently via threads — useful
 > when throughput matters more than strict sequential ordering. Note that chunked operations
 > are **not atomic**: a failure mid-way may leave earlier chunks applied. Callers that
 > require atomicity should limit their input to ≤ 1,000 records.
diff --git a/src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md b/src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md
@@ -25,7 +25,7 @@ Use the PowerPlatform Dataverse Client Python SDK to interact with Microsoft Dat
 - `client.batch` -- batch multiple operations into a single HTTP request
 
 ### Bulk Operations
-The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()`, or `upsert()` for automatic bulk processing; for `delete()`, set `use_bulk_delete=True`. Lists exceeding 1,000 records are automatically split into 1,000-record chunks — no manual pre-splitting needed. By default chunks are dispatched sequentially; pass `max_workers=N` (recommended: 3–4) to dispatch chunks concurrently via threads. Operations across chunks are **not atomic**: a failure mid-way may leave earlier chunks applied. Callers that require atomicity should limit their input to ≤ 1,000 records.
+The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()`, or `upsert()` for automatic bulk processing; for `delete()`, pass a list and set `use_bulk_delete=True` to use bulk operation. Lists exceeding 1,000 records are automatically split into 1,000-record chunks — no manual pre-splitting needed. By default chunks are dispatched sequentially; pass `max_workers=N` (max: 3, default: 1) to dispatch chunks concurrently via threads. Operations across chunks are **not atomic**: a failure mid-way may leave earlier chunks applied. Callers that require atomicity should limit their input to ≤ 1,000 records.
 
 ### Paging
 - Control page size with `page_size` parameter
diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py
@@ -57,47 +57,56 @@
 _CALL_SCOPE_CORRELATION_ID: ContextVar[Optional[str]] = ContextVar("_CALL_SCOPE_CORRELATION_ID", default=None)
 _DEFAULT_EXPECTED_STATUSES: tuple[int, ...] = (200, 201, 202, 204)
 _MULTIPLE_BATCH_SIZE = 1000
-# Concurrent chunk dispatch: 429 retry settings
-_CHUNK_RETRY_LIMIT = 3          # max retries per chunk on 429
+# Concurrent chunk dispatch settings
+_MAX_WORKERS = 3                # maximum concurrent worker threads; values above this are silently clamped
+_CHUNK_RETRY_LIMIT = 3          # max retries per chunk on transient errors
 _CHUNK_RETRY_DEFAULT_WAIT = 60  # seconds to wait when Retry-After header is absent
 _CHUNK_RETRY_JITTER_MAX = 5     # seconds of random jitter added to Retry-After to desynchronise workers
 
 
 def _dispatch_chunks(fn: Callable, chunks: List, max_workers: int) -> List:
     """Dispatch ``fn(chunk)`` for each chunk, sequentially or concurrently.
 
+    ``max_workers`` is silently clamped to ``_MAX_WORKERS`` (3) so callers
+    that pass a larger value are not penalised with an error.
+
     When ``max_workers == 1`` or there is only one chunk, runs sequentially
     with no thread overhead.  When ``max_workers > 1`` and there are multiple
-    chunks, submits all chunks to a :class:`~concurrent.futures.ThreadPoolExecutor`
-    and collects results in submission order (preserving chunk ordering).
+    chunks, submits all chunks to a :class:`~concurrent.futures.ThreadPoolExecutor`.
+    Results are collected by iterating the futures list in submission order —
+    ``futures[i].result()`` blocks until chunk *i* finishes, so the returned
+    list is always in chunk-submission order regardless of thread completion order.
 
-    On HTTP 429 (rate limit) each worker retries up to ``_CHUNK_RETRY_LIMIT``
-    times, sleeping for the ``Retry-After`` duration (falling back to
-    ``_CHUNK_RETRY_DEFAULT_WAIT`` seconds) plus a random jitter of up to
-    ``_CHUNK_RETRY_JITTER_MAX`` seconds to desynchronise concurrent retries.
+    On transient HTTP errors (429, 502, 503, 504) each worker retries up to
+    ``_CHUNK_RETRY_LIMIT`` times, sleeping for the ``Retry-After`` duration
+    (falling back to ``_CHUNK_RETRY_DEFAULT_WAIT`` seconds) plus a random jitter
+    of up to ``_CHUNK_RETRY_JITTER_MAX`` seconds to desynchronise concurrent
+    retries.  The sequential path applies the same retry logic.
 
     :param fn: Callable that accepts a single chunk and returns a result.
     :param chunks: List of chunks to process.
-    :param max_workers: Maximum number of concurrent worker threads.
+    :param max_workers: Maximum number of concurrent worker threads (clamped to ``_MAX_WORKERS``).
     :return: List of results in chunk submission order.
     """
-    if max_workers == 1 or len(chunks) <= 1:
-        return [fn(chunk) for chunk in chunks]
+    max_workers = min(max_workers, _MAX_WORKERS)
 
-    def _with_backoff(chunk):
+    def _execute_with_retry(chunk):
         for attempt in range(_CHUNK_RETRY_LIMIT + 1):
             try:
                 return fn(chunk)
             except HttpError as exc:
-                if exc.status_code == 429 and attempt < _CHUNK_RETRY_LIMIT:
-                    wait = (exc.details.get("retry_after") or _CHUNK_RETRY_DEFAULT_WAIT)
+                if exc.is_transient and attempt < _CHUNK_RETRY_LIMIT:
+                    wait = float(exc.details.get("retry_after") or _CHUNK_RETRY_DEFAULT_WAIT)
                     wait += random.uniform(0, _CHUNK_RETRY_JITTER_MAX)
                     time.sleep(wait)
                 else:
                     raise
 
+    if max_workers == 1 or len(chunks) <= 1:
+        return [_execute_with_retry(chunk) for chunk in chunks]
+
     with ThreadPoolExecutor(max_workers=max_workers) as pool:
-        futures = [pool.submit(_with_backoff, chunk) for chunk in chunks]
+        futures = [pool.submit(_execute_with_retry, chunk) for chunk in chunks]
         return [f.result() for f in futures]
 
 
@@ -248,7 +257,7 @@ def __init__(
         self._logical_primaryid_cache: dict[str, str] = {}
         self._picklist_label_cache: dict[str, dict] = {}
         self._picklist_cache_ttl_seconds = 3600  # 1 hour TTL
-        self._picklist_cache_lock = threading.Lock()  # serialises cold-start fetches under concurrent workers
+        self._picklist_cache_lock = threading.Lock()  # prevents concurrent threads from making duplicate picklist metadata fetches on cold start
 
     @contextmanager
     def _call_scope(self):
@@ -431,23 +440,21 @@ def _create_multiple(
             ``1`` (default) dispatches sequentially.
         :type max_workers: ``int``
 
-        :return: List of created record GUIDs in chunk-submission order
-            (may be empty if response lacks IDs).
+        :return: List of created record GUIDs (may be empty if response lacks IDs).
         :rtype: ``list[str]``
 
         .. note::
            Logical type stamping: if any payload omits ``@odata.type`` the client injects ``Microsoft.Dynamics.CRM.<table_logical_name>``. If all payloads already include ``@odata.type`` no modification occurs.
 
         .. warning::
-           When input exceeds ``_MULTIPLE_BATCH_SIZE`` records, the operation is
-           split into multiple requests and is **not atomic**. If a later batch
-           fails, earlier batches are already committed. Callers that require
-           atomicity should limit input to ``<= _MULTIPLE_BATCH_SIZE`` records.
+           When input exceeds ``_MULTIPLE_BATCH_SIZE`` records, the operation is split into multiple requests
+           and is **not atomic**. If a later batch fails, earlier batches are already committed. Callers
+           that require atomicity should limit input to ``<= _MULTIPLE_BATCH_SIZE`` records.
         """
         if not all(isinstance(r, dict) for r in records):
             raise TypeError("All items for multi-create must be dicts")
 
-        def _send(chunk: List[Dict[str, Any]]) -> List[str]:
+        def _execute_chunk(chunk: List[Dict[str, Any]]) -> List[str]:
             r = self._execute_raw(self._build_create_multiple(entity_set, table_schema_name, chunk))
             try:
                 body = r.json() if r.text else {}
@@ -463,6 +470,7 @@ def _send(chunk: List[Dict[str, Any]]) -> List[str]:
                 out: List[str] = []
                 for item in value:
                     if isinstance(item, dict):
+                        # Heuristic: look for a property ending with 'id'
                         for k, v in item.items():
                             if isinstance(k, str) and k.lower().endswith("id") and isinstance(v, str) and len(v) >= 32:
                                 out.append(v)
@@ -471,7 +479,7 @@ def _send(chunk: List[Dict[str, Any]]) -> List[str]:
             return []
 
         chunks = [records[i : i + _MULTIPLE_BATCH_SIZE] for i in range(0, len(records), _MULTIPLE_BATCH_SIZE)]
-        results = _dispatch_chunks(_send, chunks, max_workers)
+        results = _dispatch_chunks(_execute_chunk, chunks, max_workers)
         return [guid for batch_ids in results for guid in batch_ids]
 
     def _build_alternate_key_str(self, alternate_key: Dict[str, Any]) -> str:
@@ -596,11 +604,11 @@ def _upsert_multiple(
 
         url = f"{self.api}/{entity_set}/Microsoft.Dynamics.CRM.UpsertMultiple"
 
-        def _send(chunk):
+        def _execute_chunk(chunk):
             self._request("post", url, json={"Targets": chunk}, expected=(200, 201, 204))
 
         chunks = [targets[i : i + _MULTIPLE_BATCH_SIZE] for i in range(0, len(targets), _MULTIPLE_BATCH_SIZE)]
-        _dispatch_chunks(_send, chunks, max_workers)
+        _dispatch_chunks(_execute_chunk, chunks, max_workers)
 
     # --- Derived helpers for high-level client ergonomics ---
     def _primary_id_attr(self, table_schema_name: str) -> str:
@@ -757,11 +765,11 @@ def _update_multiple(
         if not isinstance(records, list) or not records or not all(isinstance(r, dict) for r in records):
             raise TypeError("records must be a non-empty list[dict]")
 
-        def _send(chunk):
+        def _execute_chunk(chunk):
             self._execute_raw(self._build_update_multiple_from_records(entity_set, table_schema_name, chunk))
 
         chunks = [records[i : i + _MULTIPLE_BATCH_SIZE] for i in range(0, len(records), _MULTIPLE_BATCH_SIZE)]
-        _dispatch_chunks(_send, chunks, max_workers)
+        _dispatch_chunks(_execute_chunk, chunks, max_workers)
         return None
 
     def _delete(self, table_schema_name: str, key: str) -> None:
diff --git a/src/PowerPlatform/Dataverse/operations/records.py b/src/PowerPlatform/Dataverse/operations/records.py
@@ -100,7 +100,7 @@ def create(
                 print(f"Created {len(guids)} accounts")
         """
         if not isinstance(max_workers, int) or max_workers < 1:
-            raise ValueError("max_workers must be a positive integer")
+            raise ValueError("max_workers must be a positive integer (1–3; values above 3 are clamped to 3)")
         with self._client._scoped_odata() as od:
             entity_set = od._entity_set_from_schema_name(table)
             if isinstance(data, dict):
@@ -171,7 +171,7 @@ def update(
                 )
         """
         if not isinstance(max_workers, int) or max_workers < 1:
-            raise ValueError("max_workers must be a positive integer")
+            raise ValueError("max_workers must be a positive integer (1–3; values above 3 are clamped to 3)")
         with self._client._scoped_odata() as od:
             if isinstance(ids, str):
                 if not isinstance(changes, dict):
@@ -566,7 +566,7 @@ def upsert(self, table: str, items: List[Union[UpsertItem, Dict[str, Any]]], *,
             ``{"accountnumber": "ACC-001", "address1_postalcode": "98052"}``.
         """
         if not isinstance(max_workers, int) or max_workers < 1:
-            raise ValueError("max_workers must be a positive integer")
+            raise ValueError("max_workers must be a positive integer (1–3; values above 3 are clamped to 3)")
         if not isinstance(items, list) or not items:
             raise TypeError("items must be a non-empty list of UpsertItem or dicts")
         normalized: List[UpsertItem] = []
diff --git a/tests/unit/data/test_multiple_chunking.py b/tests/unit/data/test_multiple_chunking.py