44import multiprocessing
55import os
66import socket
7+ import threading
78import traceback
89
910from google .protobuf import any_pb2
1819from pynumaflow ._constants import (
1920 _LOGGER ,
2021 MULTIPROC_MAP_SOCK_ADDR ,
22+ NUMAFLOW_GRPC_SHUTDOWN_GRACE_PERIOD_SECONDS ,
2123 UDFType ,
2224)
2325from pynumaflow .exceptions import SocketError
@@ -57,6 +59,7 @@ def sync_server_start(
5759 server_options = None ,
5860 server_info : ServerInfo | None = None ,
5961 udf_type : str = UDFType .Map ,
62+ shutdown_event : threading .Event | None = None ,
6063):
6164 """
6265 Utility function to start a sync grpc server instance.
@@ -75,6 +78,7 @@ def sync_server_start(
7578 udf_type = udf_type ,
7679 server_info_file = server_info_file ,
7780 server_info = server_info ,
81+ shutdown_event = shutdown_event ,
7882 )
7983
8084
@@ -86,10 +90,15 @@ def _run_server(
8690 udf_type : str ,
8791 server_info_file : str | None = None ,
8892 server_info : ServerInfo | None = None ,
93+ shutdown_event : threading .Event | None = None ,
8994) -> None :
9095 """
9196 Starts the Synchronous server instance on the given UNIX socket
9297 with given max threads. Wait for the server to terminate.
98+
99+ If *shutdown_event* is provided, a background daemon thread will wait
100+ on it and then call ``server.stop(NUMAFLOW_GRPC_SHUTDOWN_GRACE_PERIOD_SECONDS)``
101+ for a cooperative graceful shutdown (no process kill).
93102 """
94103 server = grpc .server (
95104 ThreadPoolExecutor (
@@ -115,10 +124,21 @@ def _run_server(
115124 server .add_insecure_port (bind_address )
116125 # start the gRPC server
117126 server .start ()
127+
118128 # Add the server information to the server info file if provided
119129 if server_info and server_info_file :
120130 info_server_write (server_info = server_info , info_file = server_info_file )
121131
132+ if shutdown_event is not None :
133+
134+ def _watch_for_shutdown ():
135+ shutdown_event .wait ()
136+ _LOGGER .info ("Shutdown signal received, stopping server gracefully..." )
137+ server .stop (NUMAFLOW_GRPC_SHUTDOWN_GRACE_PERIOD_SECONDS )
138+
139+ watcher = threading .Thread (target = _watch_for_shutdown , daemon = True )
140+ watcher .start ()
141+
122142 _LOGGER .info ("GRPC Server listening on: %s %d" , bind_address , os .getpid ())
123143 server .wait_for_termination ()
124144
@@ -243,14 +263,14 @@ def check_instance(instance, callable_type) -> bool:
243263 return False
244264
245265
246- def get_grpc_status (err : str ):
266+ def get_grpc_status (err : str , detail : str | None = None ):
247267 """
248268 Create a grpc status object with the error details.
249269 """
250270 details = any_pb2 .Any ()
251271 details .Pack (
252272 error_details_pb2 .DebugInfo (
253- detail = "\n " .join (traceback .format_stack ()),
273+ detail = detail if detail is not None else "\n " .join (traceback .format_stack ()),
254274 )
255275 )
256276
@@ -295,9 +315,9 @@ def update_context_err(context: NumaflowServicerContext, e: BaseException, err_m
295315 """
296316 trace = get_exception_traceback_str (e )
297317 _LOGGER .critical (trace )
298- _LOGGER .critical (e . __str__ () )
318+ _LOGGER .critical (err_msg )
299319
300- grpc_status = get_grpc_status (err_msg )
320+ grpc_status = get_grpc_status (err_msg , detail = trace )
301321
302322 context .set_code (grpc .StatusCode .INTERNAL )
303323 context .set_details (err_msg )
0 commit comments