Introduce UploadChangesHandler

harminius · harminius · commit ff3afac6a749 · 2025-06-23T15:00:00.000+02:00
diff --git a/mergin/cli.py b/mergin/cli.py
@@ -412,8 +412,8 @@ def push(ctx):
         return
     directory = os.getcwd()
     try:
-        blocking_job, non_blocking_job = push_project_async(mc, directory)
-        for job in [blocking_job, non_blocking_job]:
+        jobs = push_project_async(mc, directory)
+        for job in jobs:
             if job is not None:  # if job is none, we don't upload any files, and the transaction is finished already
                 with click.progressbar(length=job.total_size) as bar:
                     last_transferred_size = 0
@@ -431,7 +431,8 @@ def push(ctx):
         return
     except KeyboardInterrupt:
         click.secho("Cancelling...")
-        push_project_cancel(job)
+        for job in jobs:
+            push_project_cancel(job)
     except Exception as e:
         _print_unhandled_exception()
 
diff --git a/mergin/client_push.py b/mergin/client_push.py
@@ -15,6 +15,7 @@
 import tempfile
 import concurrent.futures
 import os
+from typing import Dict, List
 
 from .common import UPLOAD_CHUNK_SIZE, ClientError
 from .merginproject import MerginProject
@@ -83,40 +84,63 @@ def upload_blocking(self, mc, mp):
                 raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id))
 
 
-class UploadChanges:
-    def __init__(self, added=None, updated=None, removed=None):
-        self.added = added or []
-        self.updated = updated or []
-        self.removed = removed or []
-        self.renamed = []
-
-    def is_empty(self):
-        return not (self.added or self.updated or self.removed or self.renamed)
-
-    def split(self):
-        blocking = UploadChanges()
-        non_blocking = UploadChanges()
-
-        for file in self.added:
-            target = blocking if is_qgis_file(file["path"]) or is_versioned_file(file["path"]) else non_blocking
-            target.added.append(file)
+class UploadChangesHandler:
+    """
+    Handles preparation of file changes to be uploaded to the server.
 
-        for file in self.updated:
-            blocking.updated.append(file)
+    This class is responsible for:
+    - Filtering project file changes.
+    - Splitting changes into blocking and non-blocking groups.
+    - TODO: Applying limits such as max file count or size to break large uploads into smaller batches.
+    - Generating upload-ready change groups for asynchronous job creation.
+    """
 
-        for file in self.removed:
-            blocking.removed.append(file)
+    def __init__(self, mp, client, project_info):
+        self.mp = mp
+        self.client = client
+        self.project_info = project_info
+        self._raw_changes = mp.get_push_changes()
+        self._filtered_changes = filter_changes(client, project_info, self._raw_changes)
+
+    @staticmethod
+    def is_blocking_file(file):
+        return is_qgis_file(file["path"]) or is_versioned_file(file["path"])
+
+    def split_by_type(self) -> List[Dict[str, List[dict]]]:
+        """
+        Split raw filtered changes into two batches:
+        1. Blocking: updated/removed and added files that are blocking
+        2. Non-blocking: added files that are not blocking
+
+        Returns a list of dicts each with keys:
+        - added, updated, removed, blocking
+        """
+        blocking_group = {"added": [], "updated": [], "removed": [], "blocking": True}
+        non_blocking_group = {"added": [], "updated": [], "removed": [], "blocking": False}
+
+        for f in self._filtered_changes.get("added", []):
+            if self.is_blocking_file(f):
+                blocking_group["added"].append(f)
+            else:
+                non_blocking_group["added"].append(f)
+
+        for f in self._filtered_changes.get("updated", []):
+            blocking_group["updated"].append(f)
+
+        for f in self._filtered_changes.get("removed", []):
+            blocking_group["removed"].append(f)
+
+        result = []
+        if any(blocking_group[k] for k in ("added", "updated", "removed")):
+            result.append(blocking_group)
+        if any(non_blocking_group["added"]):
+            result.append(non_blocking_group)
 
-        result = {}
-        if not blocking.is_empty():
-            result["blocking"] = blocking
-        if not non_blocking.is_empty():
-            result["non_blocking"] = non_blocking
         return result
 
 
-def push_project_async(mc, directory):
-    """Starts push of a project and returns pending upload job"""
+def push_project_async(mc, directory) -> List[UploadJob]:
+    """Starts push of a project and returns pending upload jobs"""
 
     mp = MerginProject(directory)
     if mp.has_unfinished_pull():
@@ -153,111 +177,97 @@ def push_project_async(mc, directory):
             + f"\n\nLocal version: {local_version}\nServer version: {server_version}"
         )
 
-    changes = mp.get_push_changes()
-    changes = filter_changes(mc, project_info, changes)
+    changes_handler = UploadChangesHandler(mp, mc, project_info)
+    changes_groups = changes_handler.split_by_type()
 
-    blocking_changes, non_blocking_changes = changes.split()
+    jobs = []
+    for changes in changes_groups:
+        mp.log.debug("push changes:\n" + pprint.pformat(changes))
 
-    blocking_job = (
-        _prepare_upload_job(mp, mc, project_path, local_version, blocking_changes)
-        if any(len(v) for v in blocking_changes.values())
-        else None
-    )
-    non_blocking_job = (
-        _prepare_upload_job(mp, mc, project_path, local_version, non_blocking_changes)
-        if any(len(v) for v in non_blocking_changes.values())
-        else None
-    )
+        tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-")
 
-    return blocking_job, non_blocking_job
+        # If there are any versioned files (aka .gpkg) that are not updated through a diff,
+        # we need to make a temporary copy somewhere to be sure that we are uploading full content.
+        # That's because if there are pending transactions, checkpointing or switching from WAL mode
+        # won't work, and we would end up with some changes left in -wal file which do not get
+        # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything.
+        for f in changes["updated"]:
+            if mp.is_versioned_file(f["path"]) and "diff" not in f:
+                mp.copy_versioned_file_for_upload(f, tmp_dir.name)
 
+        for f in changes["added"]:
+            if mp.is_versioned_file(f["path"]):
+                mp.copy_versioned_file_for_upload(f, tmp_dir.name)
 
-def _prepare_upload_job(mp, mc, project_path, local_version, changes):
-    mp.log.debug("push changes:\n" + pprint.pformat(changes))
-
-    tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-")
-
-    # If there are any versioned files (aka .gpkg) that are not updated through a diff,
-    # we need to make a temporary copy somewhere to be sure that we are uploading full content.
-    # That's because if there are pending transactions, checkpointing or switching from WAL mode
-    # won't work, and we would end up with some changes left in -wal file which do not get
-    # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything.
-    for f in changes["updated"]:
-        if mp.is_versioned_file(f["path"]) and "diff" not in f:
-            mp.copy_versioned_file_for_upload(f, tmp_dir.name)
-
-    for f in changes["added"]:
-        if mp.is_versioned_file(f["path"]):
-            mp.copy_versioned_file_for_upload(f, tmp_dir.name)
-
-    if not sum(len(v) for v in changes.values()):
-        mp.log.info(f"--- push {project_path} - nothing to do")
-        return
+        if not sum(len(v) for v in changes.values()):
+            mp.log.info(f"--- push {project_path} - nothing to do")
+            return
 
-    # drop internal info from being sent to server
-    for item in changes["updated"]:
-        item.pop("origin_checksum", None)
-    data = {"version": local_version, "changes": changes}
+        # drop internal info from being sent to server
+        for item in changes["updated"]:
+            item.pop("origin_checksum", None)
+        data = {"version": local_version, "changes": changes}
 
-    try:
-        resp = mc.post(
-            f"/v1/project/push/{project_path}",
-            data,
-            {"Content-Type": "application/json"},
-        )
-    except ClientError as err:
-        mp.log.error("Error starting transaction: " + str(err))
-        mp.log.info("--- push aborted")
-        raise
-    server_resp = json.load(resp)
-
-    upload_files = data["changes"]["added"] + data["changes"]["updated"]
-
-    transaction_id = server_resp["transaction"] if upload_files else None
-    job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir)
-
-    if not upload_files:
-        mp.log.info("not uploading any files")
-        job.server_resp = server_resp
-        push_project_finalize(job)
-        return None  # all done - no pending job
-
-    mp.log.info(f"got transaction ID {transaction_id}")
-
-    upload_queue_items = []
-    total_size = 0
-    # prepare file chunks for upload
-    for file in upload_files:
-        if "diff" in file:
-            # versioned file - uploading diff
-            file_location = mp.fpath_meta(file["diff"]["path"])
-            file_size = file["diff"]["size"]
-        elif "upload_file" in file:
-            # versioned file - uploading full (a temporary copy)
-            file_location = file["upload_file"]
-            file_size = file["size"]
-        else:
-            # non-versioned file
-            file_location = mp.fpath(file["path"])
-            file_size = file["size"]
-
-        for chunk_index, chunk_id in enumerate(file["chunks"]):
-            size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE)
-            upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index))
-
-        total_size += file_size
-
-    job.total_size = total_size
-    job.upload_queue_items = upload_queue_items
-
-    mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}")
-
-    # start uploads in background
-    job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
-    for item in upload_queue_items:
-        future = job.executor.submit(_do_upload, item, job)
-        job.futures.append(future)
-    return job
+        try:
+            resp = mc.post(
+                f"/v1/project/push/{project_path}",
+                data,
+                {"Content-Type": "application/json"},
+            )
+        except ClientError as err:
+            mp.log.error("Error starting transaction: " + str(err))
+            mp.log.info("--- push aborted")
+            raise
+        server_resp = json.load(resp)
+
+        upload_files = data["changes"]["added"] + data["changes"]["updated"]
+
+        transaction_id = server_resp["transaction"] if upload_files else None
+        job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir)
+
+        if not upload_files:
+            mp.log.info("not uploading any files")
+            job.server_resp = server_resp
+            push_project_finalize(job)
+            return None  # all done - no pending job
+
+        mp.log.info(f"got transaction ID {transaction_id}")
+
+        upload_queue_items = []
+        total_size = 0
+        # prepare file chunks for upload
+        for file in upload_files:
+            if "diff" in file:
+                # versioned file - uploading diff
+                file_location = mp.fpath_meta(file["diff"]["path"])
+                file_size = file["diff"]["size"]
+            elif "upload_file" in file:
+                # versioned file - uploading full (a temporary copy)
+                file_location = file["upload_file"]
+                file_size = file["size"]
+            else:
+                # non-versioned file
+                file_location = mp.fpath(file["path"])
+                file_size = file["size"]
+
+            for chunk_index, chunk_id in enumerate(file["chunks"]):
+                size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE)
+                upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index))
+
+            total_size += file_size
+
+        job.total_size = total_size
+        job.upload_queue_items = upload_queue_items
+
+        mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}")
+
+        # start uploads in background
+        job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+        for item in upload_queue_items:
+            future = job.executor.submit(_do_upload, item, job)
+            job.futures.append(future)
+        jobs.append(job)
+    return jobs
 
 
 def push_project_wait(job):
diff --git a/mergin/editor.py b/mergin/editor.py
@@ -25,7 +25,7 @@ def is_editor_enabled(mc, project_info: dict) -> bool:
     return server_support and project_role == EDITOR_ROLE_NAME
 
 
-def _apply_editor_filters(changes: UploadChanges) -> UploadChanges:
+def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]:
     """
     Applies editor-specific filters to the changes dictionary, removing any changes to files that are not in the editor's list of allowed files.
 
@@ -41,7 +41,7 @@ def _apply_editor_filters(changes: UploadChanges) -> UploadChanges:
     return changes
 
 
-def filter_changes(mc, project_info: dict, changes: UploadChanges) -> UploadChanges:
+def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]:
     """
     Filters the given changes dictionary based on the editor's enabled state.