apache · gaogaotiantian · Feb 11, 2026 · HyukjinKwon · Feb 12, 2026 · gaogaotiantian
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerUtils.scala
@@ -133,56 +133,50 @@ private[spark] object PythonWorkerUtils extends Logging {
     // number of different broadcasts
     val toRemove = oldBids.diff(newBids)
     val addedBids = newBids.diff(oldBids)
-    val cnt = toRemove.size + addedBids.size
     val needsDecryptionServer = env.serializerManager.encryptionEnabled && addedBids.nonEmpty
-    dataOut.writeBoolean(needsDecryptionServer)
-    dataOut.writeInt(cnt)
-    def sendBidsToRemove(): Unit = {
-      for (bid <- toRemove) {
-        // remove the broadcast from worker
-        dataOut.writeLong(-bid - 1) // bid >= 0
-        oldBids.remove(bid)
-      }
-    }
+    var connInfo: Option[Any] = None
+    var secret: Option[String] = None
+
     if (needsDecryptionServer) {
       // if there is encryption, we setup a server which reads the encrypted files, and sends
       // the decrypted data to python
-      val idsAndFiles = broadcastVars.flatMap { broadcast =>
-        if (!oldBids.contains(broadcast.id)) {
-          oldBids.add(broadcast.id)
-          Some((broadcast.id, broadcast.value.path))
-        } else {
-          None
-        }
+      val idsAndFiles = broadcastVars.filter(b => !oldBids.contains(b.id)).map { broadcast =>
+        (broadcast.id, broadcast.value.path)
       }
       val server = new EncryptedPythonBroadcastServer(env, idsAndFiles)
       server.connInfo match {
         case portNum: Int =>
-          dataOut.writeInt(portNum)
-          writeUTF(server.secret, dataOut)
+          connInfo = Some(portNum)
+          secret = Some(server.secret)
         case sockPath: String =>
-          dataOut.writeInt(-1)
-          writeUTF(sockPath, dataOut)
+          connInfo = Some(sockPath)
       }
       logTrace(s"broadcast decryption server setup on ${server.connInfo}")
-      sendBidsToRemove()
-      idsAndFiles.foreach { case (id, _) =>
-        // send new broadcast
-        dataOut.writeLong(id)
-      }
-      dataOut.flush()
-    } else {
-      sendBidsToRemove()
-      for (broadcast <- broadcastVars) {
-        if (!oldBids.contains(broadcast.id)) {
-          // send new broadcast
-          dataOut.writeLong(broadcast.id)
-          writeUTF(broadcast.value.path, dataOut)
-          oldBids.add(broadcast.id)
-        }
-      }
     }
-    dataOut.flush()
+
+    val json = Serialization.write(Map(
+        "broadcast_decryption_server" -> needsDecryptionServer,
+        "conn_info" -> connInfo.orNull,
+        "auth_secret" -> secret.orNull,
+        "broadcast_variables" -> (
+          broadcastVars.filter(b => !oldBids.contains(b.id)).map { broadcast =>
+            Map(
+              "bid" -> broadcast.id,
+              "path" -> broadcast.value.path
+            )
+          } ++ toRemove.map { bid =>
+            Map(
+              "bid" -> (-bid - 1),
+              "path" -> null
+            )
+          }
+        )
+      ))
+
+    oldBids.clear()
+    oldBids ++= newBids
+
+    writeUTF(json, dataOut)
   }
 
   /**

diff --git a/python/pyspark/worker_util.py b/python/pyspark/worker_util.py
@@ -20,6 +20,7 @@
 """
 import importlib
 from inspect import currentframe, getframeinfo
+import json
 import os
 import sys
 from typing import Any, IO, Optional
@@ -42,7 +43,6 @@
 from pyspark.errors import PySparkRuntimeError
 from pyspark.util import local_connect_and_auth
 from pyspark.serializers import (
-    read_bool,
     read_int,
     read_long,
     write_int,
@@ -155,39 +155,42 @@ def setup_spark_files(infile: IO) -> None:
 def setup_broadcasts(infile: IO) -> None:
     """
     Set up broadcasted variables.
+    {
+        "conn_info": int | str | None,
+        "auth_secret": str | None,
+        "broadcast_variables": [
+            {
+                "bid": int,
+                "path": str | None,
+            }
+        ]
+    }
     """
     if not is_remote_only():
         from pyspark.core.broadcast import Broadcast, _broadcastRegistry
 
-    # fetch names and values of broadcast variables
-    needs_broadcast_decryption_server = read_bool(infile)
-    num_broadcast_variables = read_int(infile)
-    if needs_broadcast_decryption_server:
+    data = json.loads(utf8_deserializer.loads(infile))
+
+    broadcast_sock_file = None
+    if data["broadcast_decryption_server"]:
         # read the decrypted data from a server in the jvm
-        conn_info = read_int(infile)
-        auth_secret = None
-        if conn_info == -1:
-            conn_info = utf8_deserializer.loads(infile)
-        else:
-            auth_secret = utf8_deserializer.loads(infile)
-        (broadcast_sock_file, _) = local_connect_and_auth(conn_info, auth_secret)
+        (broadcast_sock_file, _) = local_connect_and_auth(data["conn_info"], data["auth_secret"])
 
-    for _ in range(num_broadcast_variables):
-        bid = read_long(infile)
+    for broadcast_variable in data["broadcast_variables"]:
+        bid = broadcast_variable["bid"]
         if bid >= 0:
-            if needs_broadcast_decryption_server:
+            if broadcast_sock_file is not None:
                 read_bid = read_long(broadcast_sock_file)
                 assert read_bid == bid
                 _broadcastRegistry[bid] = Broadcast(sock_file=broadcast_sock_file)
             else:
-                path = utf8_deserializer.loads(infile)
-                _broadcastRegistry[bid] = Broadcast(path=path)
+                _broadcastRegistry[bid] = Broadcast(path=broadcast_variable["path"])
 
         else:
             bid = -bid - 1
             _broadcastRegistry.pop(bid)
 
-    if needs_broadcast_decryption_server:
+    if broadcast_sock_file is not None:
         broadcast_sock_file.write(b"1")
         broadcast_sock_file.close()