consolidate to one upload_file method

Max Wang · Max Wang · commit 2e8701ddcec1 · 2025-10-10T15:10:52.000-07:00
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ A minimal Python SDK to use Microsoft Dataverse as a database for Azure AI Found
 - Bulk create — Pass a list of records to `create(...)` to invoke the bound `CreateMultiple` action; returns `list[str]` of GUIDs. If `@odata.type` is absent the SDK resolves the logical name from metadata (cached).
 - Bulk update — Call `update_multiple(entity_set, records)` to invoke the bound `UpdateMultiple` action; returns nothing. Each record must include the real primary key attribute (e.g. `accountid`).
 - Retrieve multiple (paging) — Generator-based `get_multiple(...)` that yields pages, supports `$top` and Prefer: `odata.maxpagesize` (`page_size`).
-- Upload files — 3 methods to upload files to file column. See https://learn.microsoft.com/en-us/power-apps/developer/data-platform/file-column-data?tabs=sdk#upload-files
+- Upload files — Call `upload_file(entity_set, ...)` and a upload method will be auto picked (user can also overwrite the upload mode). See https://learn.microsoft.com/en-us/power-apps/developer/data-platform/file-column-data?tabs=sdk#upload-files
 - Metadata helpers — Create/inspect/delete simple custom tables (EntityDefinitions + Attributes).
 - Pandas helpers — Convenience DataFrame oriented wrappers for quick prototyping/notebooks.
 - Auth — Azure Identity (`TokenCredential`) injection.
@@ -20,7 +20,7 @@ A minimal Python SDK to use Microsoft Dataverse as a database for Azure AI Found
 - Bulk create via `CreateMultiple` (collection-bound) by passing `list[dict]` to `create(entity_set, payloads)`; returns list of created IDs.
 - Bulk update via `UpdateMultiple` by calling `update_multiple(entity_set, records)` with primary key attribute present in each record; returns nothing.
 - Retrieve multiple with server-driven paging: `get_multiple(...)` yields lists (pages) following `@odata.nextLink`. Control total via `$top` and per-page via `page_size` (Prefer: `odata.maxpagesize`).
-- Upload files either with dv message blocks, a single request (supports file size up to 128 MB), or in chunks
+- Upload files, using either dv message blocks, a single request (supports file size up to 128 MB), or chunk upload under the hood
 - Optional pandas integration (`PandasODataClient`) for DataFrame based create / get / query.
 
 Auth:
@@ -171,17 +171,17 @@ Notes:
 3 methods are supported: `upload_file(entity_set, ...)`, `upload_file_small(entity_set, ...)`, `upload_file_chunk(entity_set, ...)`. All returns `None`.
 
 ```python
-client.upload_file('account', record_id, 'accountid', 'sample_filecolumn', 'test.pdf')
+client.upload_file('account', record_id, 'sample_filecolumn', 'test.pdf')
 
-client.upload_file_small('account', record_id, 'sample_filecolumn', 'test.pdf')
+client.upload_file('account', record_id, 'sample_filecolumn', 'test.pdf', mode='chunk', if_none_match=True)
 
-client.upload_file_chunk('account', record_id, 'sample_filecolumn', 'test.pdf')
 ```
 
 Notes:
-- upload_file uses Dataverse messages and upload the file in Base64 encoded blocks (size limit is 4 MB for the Base64 encoded string), it consists of 3 stages: InitializeFileBlocksUpload, UploadBlock, and CommitFileBlocksUpload. Total number of Web API calls is number of blocks + 2.
+- upload_file picks one of the three methods to use based on file size: if file is less than 128 MB uses upload_file_small, otherwise uses upload_file_chunk. upload_file_block is used when explicitly requested
 - upload_file_small makes a single Web API call and only supports file size < 128 MB
 - upload_file_chunk uses PATCH with Content-Range to upload the file (more aligned with HTTP standard compared to Dataverse messages). It consists of 2 stages 1. PATCH request to get the headers used for actual upload. 2. Actual upload in chunks. It uses x-ms-chunk-size returned in the first stage to determine chunk size (normally 4 MB), and use Content-Range and Content-Length as metadata for the upload.
+- upload_file_block uses Dataverse messages and upload the file in Base64 encoded blocks (size limit is 4 MB for the Base64 encoded string), it consists of 3 stages: InitializeFileBlocksUpload, UploadBlock, and CommitFileBlocksUpload. Total number of Web API calls is number of blocks + 2.
 
 ## Retrieve multiple with paging
 
diff --git a/examples/quickstart_file_upload.py b/examples/quickstart_file_upload.py
@@ -290,9 +290,10 @@ def get_dataset_info(file_path: Path):
         backoff(lambda: client.upload_file(
             entity_set,
             record_id,
-            pk_attr,
             file_attr_logical,
             str(DATASET_FILE),
+            mode="block",
+            id_attribute=pk_attr,
         ))
         print({"block_upload_completed": True})
         # Immediate download + verify
@@ -319,9 +320,10 @@ def get_dataset_info(file_path: Path):
         backoff(lambda: client.upload_file(
             entity_set,
             record_id,
-            pk_attr,
             file_attr_logical,
             str(replacement_file),
+            mode="block",
+            id_attribute=pk_attr,
         ))
         print({"block_replace_upload_completed": True})
         # Download and verify replacement
@@ -347,11 +349,12 @@ def get_dataset_info(file_path: Path):
     print("Small single-request upload demo:")
     try:
         DATASET_FILE, small_file_size, src_hash = get_dataset_info(_GENERATED_TEST_FILE)
-        backoff(lambda: client.upload_file_small(
+        backoff(lambda: client.upload_file(
             entity_set,
             record_id,
             small_file_attr_logical,
             str(DATASET_FILE),
+            mode="small",
         ))
         print({"small_upload_completed": True, "small_source_size": small_file_size})
         odata = client._get_odata()
@@ -374,11 +377,12 @@ def get_dataset_info(file_path: Path):
         # Now test replacing with an 8MB file
         print("Small single-request upload demo - REPLACE with 8MB file:")
         replacement_file, replace_size_small, replace_hash_small = get_dataset_info(_GENERATED_TEST_FILE_8MB)
-        backoff(lambda: client.upload_file_small(
+        backoff(lambda: client.upload_file(
             entity_set,
             record_id,
             small_file_attr_logical,
             str(replacement_file),
+            mode="small",
         ))
         print({"small_replace_upload_completed": True, "small_replace_source_size": replace_size_small})
         resp_single_replace = odata._request("get", dl_url_single, headers=odata._headers())
@@ -402,11 +406,12 @@ def get_dataset_info(file_path: Path):
     print("Streaming chunk upload demo (upload_file_chunk):")
     try:
         DATASET_FILE, src_size_chunk, src_hash_chunk = get_dataset_info(_GENERATED_TEST_FILE)
-        backoff(lambda: client.upload_file_chunk(
+        backoff(lambda: client.upload_file(
             entity_set,
             record_id,
             chunk_file_attr_logical,
             str(DATASET_FILE),
+            mode="chunk",
         ))
         print({"chunk_upload_completed": True})
         odata = client._get_odata()
@@ -429,11 +434,12 @@ def get_dataset_info(file_path: Path):
         # Now test replacing with an 8MB file
         print("Streaming chunk upload demo - REPLACE with 8MB file:")
         replacement_file, replace_size_chunk, replace_hash_chunk = get_dataset_info(_GENERATED_TEST_FILE_8MB)
-        backoff(lambda: client.upload_file_chunk(
+        backoff(lambda: client.upload_file(
             entity_set,
             record_id,
             chunk_file_attr_logical,
             str(replacement_file),
+            mode="chunk",
         ))
         print({"chunk_replace_upload_completed": True})
         resp_chunk_replace = odata._request("get", dl_url_chunk, headers=odata._headers())
diff --git a/src/dataverse_sdk/client.py b/src/dataverse_sdk/client.py
@@ -264,30 +264,44 @@ def upload_file(
         self,
         entity_set: str,
         record_id: str,
-        id_attribute: str,
         file_name_attribute: str,
         path: str,
         *,
+        mode: Optional[str] = None,
         mime_type: Optional[str] = None,
-    ) -> Dict[str, Any]:
-        """Upload a local file into a Dataverse file column.
+        id_attribute: Optional[str] = None,
+        if_none_match: bool = True,
+    ) -> None:
+        """Upload a file to a Dataverse file column with automatic method selection.
 
         Parameters
         ----------
         entity_set : str
             Target entity set (plural logical name), e.g. "accounts".
         record_id : str
             GUID of the target record.
-        id_attribute : str
-            Logical name of the record primary key attribute (e.g. ``accountid``).
         file_name_attribute : str
             Logical name of the file column attribute.
         path : str
             Local filesystem path to the file. Stored filename will be the basename of this path.
+        mode : str | None, keyword-only, optional
+            Upload strategy: "auto" (default), "block", "small", or "chunk".
+            - "auto": Automatically selects best method based on file size
+            - "small": Single PATCH request (files <128MB only)
+            - "chunk": Streaming chunked upload (any size, most efficient for large files)
+            - "block": Message-based block upload (any size, compatibility fallback)
         mime_type : str | None, keyword-only, optional
             Explicit MIME type to persist with the file (e.g. "application/pdf"). If omitted the
             lower-level client attempts to infer from the filename extension and falls back to
             ``application/octet-stream``.
+        id_attribute : str | None, keyword-only, optional
+            Logical name of the primary key attribute for the record (e.g. ``accountid``).
+            **Required** when using "block" mode; raises ValueError if omitted.
+            Not used for "small" or "chunk" modes.
+        if_none_match : bool, keyword-only, optional
+            When True (default), sends ``If-None-Match: null`` to only succeed if the column is 
+            currently empty. Set False to always overwrite (uses ``If-Match: *``).
+            Used for "small" and "chunk" modes only.
 
         Returns
         -------
@@ -297,95 +311,13 @@ def upload_file(
         self._get_odata().upload_file(
             entity_set,
             record_id,
-            id_attribute,
             file_name_attribute,
             path,
+            mode=mode,
             mime_type=mime_type,
-        )
-        return None
-
-    def upload_file_small(
-        self,
-        entity_set: str,
-        record_id: str,
-        file_name_attribute: str,
-        path: str,
-        *,
-        content_type: Optional[str] = None,
-        if_none_match: bool = True,
-    ) -> None:
-        """Upload a file (<128MB) in one PATCH request to a file column.
-
-        Parameters
-        ----------
-        entity_set : str
-            Target entity set (plural logical name), e.g. "accounts".
-        record_id : str
-            GUID of the target record (with or without braces / parentheses).
-        file_name_attribute : str
-            Logical name of the file column attribute.
-        path : str
-            Local filesystem path to the file.
-        content_type : str | None
-            Optional explicit MIME type. If omitted a basic guess isn't performed here; defaults to application/octet-stream.
-        if_none_match : bool
-            When True sends ``If-None-Match: null`` to only succeed if the column is currently empty.
-            Set False to always overwrite (uses ``If-Match: *``).
-
-        Returns
-        -------
-        None
-            Returns nothing on success. Raises on failure.
-        """
-        self._get_odata().upload_file_small(
-            entity_set,
-            record_id,
-            file_name_attribute,
-            path,
-            content_type=content_type,
+            id_attribute=id_attribute,
             if_none_match=if_none_match,
         )
         return None
 
-    def upload_file_chunk(
-        self,
-        entity_set: str,
-        record_id: str,
-        file_name_attribute: str,
-        path: str,
-        *,
-        if_none_match: bool = True,
-    ) -> None:
-        """Stream a local file using native chunked PATCH protocol (x-ms-transfer-mode: chunked).
-
-        Parameters
-        ----------
-        entity_set : str
-            Target entity set (plural logical name), e.g. "accounts".
-        record_id : str
-            GUID of the target record.
-        file_name_attribute : str
-            Logical name of the file column attribute.
-        path : str
-            Local filesystem path to the file.
-        if_none_match : bool
-            When True sends ``If-None-Match: null`` to only succeed if the column is currently empty.
-            Set False to always overwrite (uses ``If-Match: *``).
-            
-        Returns
-        -------
-        None
-            Returns nothing on success. Raises on failure.
-        """
-        self._get_odata().upload_file_chunk(
-            entity_set,
-            record_id,
-            file_name_attribute,
-            path,
-            if_none_match=if_none_match,
-        )
-        return None
-
-
-__all__ = ["DataverseClient"]
-        
+__all__ = ["DataverseClient"]
diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py