Skip to content

Commit 59139b4

Browse files
author
Saurabh Badenkal
committed
Add batch edge case tests, DataFrame integration, and spec compliance fixes
Spec compliance and correctness: - Skip empty changesets in _resolve_all instead of producing invalid multipart - Extract content-id from non-changeset response parts (was passing None) Edge case tests (40 new tests in test_batch_edge_cases.py): - Empty changeset handling (skipped silently) - Changeset error/rollback response parsing - Content-ID in standalone and changeset response parts - Mixed batch: changeset writes + standalone GETs - Multiple changesets with globally unique content IDs - Batch size limit counting across changesets - Top-level batch error handling (JSON, non-JSON, empty body) - Batch without continue-on-error (first failure stops) - Batch with continue-on-error (mixed success/failure) - OData multipart serialization compliance (CRLF, boundaries, headers) - BatchResult computed properties edge cases - Multipart response parsing edge cases (REQ_ID header, GUID formats) - Content-ID reference format and usage ( in @odata.bind, update, delete) - Intent validation for unknown types - Batch boundary format validation DataFrame + Batch integration: - New BatchDataFrameOperations class (batch.dataframe namespace) - batch.dataframe.create(table, df) -- DataFrame rows to CreateMultiple - batch.dataframe.update(table, df, id_column) -- DataFrame to updates - batch.dataframe.delete(table, ids_series) -- pandas Series to deletes - 18 new tests in test_batch_dataframe.py covering all operations Total: 579 tests passing (58 new tests added)
1 parent 537b198 commit 59139b4

4 files changed

Lines changed: 1196 additions & 2 deletions

File tree

src/PowerPlatform/Dataverse/data/_batch.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,9 @@ def _resolve_all(self, items: List[Any]) -> List[Union[_RawRequest, _ChangeSetBa
289289
result: List[Union[_RawRequest, _ChangeSetBatchItem]] = []
290290
for item in items:
291291
if isinstance(item, _ChangeSet):
292+
if not item.operations:
293+
# Empty changeset — nothing to send; skip silently.
294+
continue
292295
cs_requests = [self._resolve_one(op) for op in item.operations]
293296
result.append(_ChangeSetBatchItem(requests=cs_requests))
294297
else:
@@ -574,7 +577,7 @@ def _parse_batch_response(self, response: Any) -> BatchResult:
574577
if item is not None:
575578
responses.append(item)
576579
else:
577-
item = _parse_http_response_part(part_body, content_id=None)
580+
item = _parse_http_response_part(part_body, content_id=part_headers.get("content-id"))
578581
if item is not None:
579582
responses.append(item)
580583
return BatchResult(responses=responses)

src/PowerPlatform/Dataverse/operations/batch.py

Lines changed: 177 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
99

10+
import pandas as pd
11+
1012
from ..core.errors import ValidationError
1113
from ..data._batch import (
1214
_BatchClient,
@@ -540,6 +542,178 @@ def sql(self, sql: str) -> None:
540542
self._batch._items.append(_QuerySql(sql=sql.strip()))
541543

542544

545+
# ---------------------------------------------------------------------------
546+
# DataFrame batch operations
547+
# ---------------------------------------------------------------------------
548+
549+
550+
class BatchDataFrameOperations:
551+
"""DataFrame-oriented wrappers for batch record operations.
552+
553+
Provides :meth:`create`, :meth:`update`, and :meth:`delete` that accept
554+
``pandas.DataFrame`` / ``pandas.Series`` inputs and convert them to standard
555+
dicts before enqueueing on the batch. This lets data-science callers feed
556+
DataFrames directly into a batch without manual conversion.
557+
558+
Accessed via ``batch.dataframe``.
559+
560+
Example::
561+
562+
import pandas as pd
563+
564+
batch = client.batch.new()
565+
df = pd.DataFrame([
566+
{"name": "Contoso", "telephone1": "555-0100"},
567+
{"name": "Fabrikam", "telephone1": "555-0200"},
568+
])
569+
batch.dataframe.create("account", df)
570+
result = batch.execute()
571+
"""
572+
573+
def __init__(self, batch: "BatchRequest") -> None:
574+
self._batch = batch
575+
576+
def create(self, table: str, records: pd.DataFrame) -> None:
577+
"""Enqueue record creates from a pandas DataFrame.
578+
579+
Each row becomes a record. All rows are bundled in a single
580+
``CreateMultiple`` batch item (one HTTP request in the batch).
581+
582+
:param table: Table schema name (e.g. ``"account"``).
583+
:type table: :class:`str`
584+
:param records: DataFrame where each row is a record to create.
585+
:type records: ~pandas.DataFrame
586+
587+
:raises TypeError: If ``records`` is not a pandas DataFrame.
588+
:raises ValueError: If ``records`` is empty or any row has no non-null values.
589+
590+
Example::
591+
592+
df = pd.DataFrame([{"name": "Contoso"}, {"name": "Fabrikam"}])
593+
batch.dataframe.create("account", df)
594+
"""
595+
if not isinstance(records, pd.DataFrame):
596+
raise TypeError("records must be a pandas DataFrame")
597+
if records.empty:
598+
raise ValueError("records must be a non-empty DataFrame")
599+
600+
from ..utils._pandas import dataframe_to_records
601+
602+
record_list = dataframe_to_records(records)
603+
empty_rows = [records.index[i] for i, r in enumerate(record_list) if not r]
604+
if empty_rows:
605+
raise ValueError(
606+
f"Records at index(es) {empty_rows} have no non-null values. "
607+
"All rows must contain at least one field to create."
608+
)
609+
self._batch.records.create(table, record_list)
610+
611+
def update(
612+
self,
613+
table: str,
614+
changes: pd.DataFrame,
615+
id_column: str,
616+
clear_nulls: bool = False,
617+
) -> None:
618+
"""Enqueue record updates from a pandas DataFrame.
619+
620+
Each row represents an update. The ``id_column`` specifies which
621+
column contains the record GUIDs.
622+
623+
:param table: Table schema name (e.g. ``"account"``).
624+
:type table: :class:`str`
625+
:param changes: DataFrame where each row contains a record GUID and
626+
the fields to update.
627+
:type changes: ~pandas.DataFrame
628+
:param id_column: Name of the DataFrame column containing record GUIDs.
629+
:type id_column: :class:`str`
630+
:param clear_nulls: When ``False`` (default), NaN/None values are
631+
skipped. When ``True``, NaN/None sends ``null`` to clear the field.
632+
:type clear_nulls: :class:`bool`
633+
634+
:raises TypeError: If ``changes`` is not a pandas DataFrame.
635+
:raises ValueError: If ``changes`` is empty, ``id_column`` is missing,
636+
or IDs are invalid.
637+
638+
Example::
639+
640+
df = pd.DataFrame([
641+
{"accountid": "guid-1", "telephone1": "555-0100"},
642+
{"accountid": "guid-2", "telephone1": "555-0200"},
643+
])
644+
batch.dataframe.update("account", df, id_column="accountid")
645+
"""
646+
if not isinstance(changes, pd.DataFrame):
647+
raise TypeError("changes must be a pandas DataFrame")
648+
if changes.empty:
649+
raise ValueError("changes must be a non-empty DataFrame")
650+
if id_column not in changes.columns:
651+
raise ValueError(f"id_column '{id_column}' not found in DataFrame columns")
652+
653+
raw_ids = changes[id_column].tolist()
654+
invalid = [changes.index[i] for i, v in enumerate(raw_ids) if not isinstance(v, str) or not v.strip()]
655+
if invalid:
656+
raise ValueError(
657+
f"id_column '{id_column}' contains invalid values at row index(es) {invalid}. "
658+
"All IDs must be non-empty strings."
659+
)
660+
ids = [v.strip() for v in raw_ids]
661+
662+
change_columns = [c for c in changes.columns if c != id_column]
663+
if not change_columns:
664+
raise ValueError(
665+
"No columns to update. The DataFrame must contain at least one column besides the id_column."
666+
)
667+
668+
from ..utils._pandas import dataframe_to_records
669+
670+
change_list = dataframe_to_records(changes[change_columns], na_as_null=clear_nulls)
671+
paired = [(rid, patch) for rid, patch in zip(ids, change_list) if patch]
672+
if not paired:
673+
return
674+
ids_filtered = [p[0] for p in paired]
675+
change_filtered = [p[1] for p in paired]
676+
677+
self._batch.records.update(table, ids_filtered, change_filtered)
678+
679+
def delete(
680+
self,
681+
table: str,
682+
ids: pd.Series,
683+
use_bulk_delete: bool = True,
684+
) -> None:
685+
"""Enqueue record deletes from a pandas Series of GUIDs.
686+
687+
:param table: Table schema name (e.g. ``"account"``).
688+
:type table: :class:`str`
689+
:param ids: Series of record GUIDs to delete.
690+
:type ids: ~pandas.Series
691+
:param use_bulk_delete: When ``True`` (default) and ``ids`` has multiple
692+
values, use the ``BulkDelete`` action.
693+
:type use_bulk_delete: :class:`bool`
694+
695+
:raises TypeError: If ``ids`` is not a pandas Series.
696+
:raises ValueError: If ``ids`` contains invalid values.
697+
698+
Example::
699+
700+
ids_series = pd.Series(["guid-1", "guid-2", "guid-3"])
701+
batch.dataframe.delete("account", ids_series)
702+
"""
703+
if not isinstance(ids, pd.Series):
704+
raise TypeError("ids must be a pandas Series")
705+
raw_list = ids.tolist()
706+
if not raw_list:
707+
return
708+
invalid = [ids.index[i] for i, v in enumerate(raw_list) if not isinstance(v, str) or not v.strip()]
709+
if invalid:
710+
raise ValueError(
711+
f"ids contains invalid values at index(es) {invalid}. All IDs must be non-empty strings."
712+
)
713+
id_list = [v.strip() for v in raw_list]
714+
self._batch.records.delete(table, id_list, use_bulk_delete=use_bulk_delete)
715+
716+
543717
# ---------------------------------------------------------------------------
544718
# BatchRequest and BatchOperations
545719
# ---------------------------------------------------------------------------
@@ -550,7 +724,8 @@ class BatchRequest:
550724
Builder for constructing and executing a Dataverse OData ``$batch`` request.
551725
552726
Obtain via :meth:`BatchOperations.new` (``client.batch.new()``). Add operations
553-
through :attr:`records`, :attr:`tables`, and :attr:`query`, optionally group writes
727+
through :attr:`records`, :attr:`tables`, :attr:`query`, and :attr:`dataframe`,
728+
optionally group writes
554729
into a :meth:`changeset`, then call :meth:`execute`.
555730
556731
Operations are executed sequentially in the order added. The resulting
@@ -581,6 +756,7 @@ def __init__(self, client: "DataverseClient") -> None:
581756
self.records = BatchRecordOperations(self)
582757
self.tables = BatchTableOperations(self)
583758
self.query = BatchQueryOperations(self)
759+
self.dataframe = BatchDataFrameOperations(self)
584760

585761
def changeset(self) -> ChangeSet:
586762
"""

0 commit comments

Comments
 (0)