Skip to content

Commit 63b10b2

Browse files
committed
Implement max number of attempts to connect to other backend processes when starting up the backend
1 parent 9718855 commit 63b10b2

4 files changed

Lines changed: 44 additions & 34 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ For more details refer to the [documentation](https://softwarequtech.github.io/S
1010
nodes defined in the network configuration.
1111
- macOS Support: use "pollreactor" in macOS platforms to correctly allow starting backend processes.
1212
- Avoid using port 8021, which is used by a system service in macOS platforms.
13+
- Starting the SimulaQron backend now respects the "conn_max_retries" configuration when backend processes
14+
try to connect to each other.
1315

1416

1517
2026-04-23 (v4.1.1)

simulaqron/start/start_qnodeos.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def _init_register(virt_root, my_name: str, node: NetQASMFactory):
3030
_setup_netqasm_server(my_name, node)
3131

3232

33-
def _connect_to_virt_node(my_name: str, netqasm_factory: NetQASMFactory, virtual_network: SocketsConfig):
33+
def _connect_to_virt_node(my_name: str, netqasm_factory: NetQASMFactory, virtual_network: SocketsConfig, attempt: int = 0):
3434
"""Tries to connect to local virtual node.
3535
3636
If connection is refused, we try again after a set amount of time
@@ -49,36 +49,38 @@ def _connect_to_virt_node(my_name: str, netqasm_factory: NetQASMFactory, virtual
4949
defer_virtual_node.addCallback(_init_register, my_name, netqasm_factory)
5050
# If connection fails do:
5151
defer_virtual_node.addErrback(_handle_connection_error, my_name, netqasm_factory, virtual_network,
52-
virtual_node.hostname, virtual_node.port)
52+
virtual_node.hostname, virtual_node.port, attempt)
5353

5454

5555
def _handle_connection_error(reason, my_name: str, netqasm_factory: NetQASMFactory, virtual_network: SocketsConfig,
56-
virtual_node_hostname: str, virtual_node_port: int):
56+
virtual_node_hostname: str, virtual_node_port: int, attempt: int):
5757
""" Handles errors from trying to connect to local virtual node.
5858
5959
If a ConnectionRefusedError is raised another try will be made after
6060
Settings.CONF_WAIT_TIME seconds. Any other error is raised again.
6161
"""
6262
try:
6363
reason.raiseException()
64-
except ConnectionRefusedError as err:
65-
# TODO - Implement checking of max number of connections
66-
logger.debug("START_QNODEOS %s: Could not connect to Virtual node (%s, %d), trying again...", my_name,
67-
virtual_node_hostname, virtual_node_port, exc_info=err)
68-
reactor.callLater(
69-
simulaqron_settings.conn_retry_time,
70-
_connect_to_virt_node,
71-
my_name,
72-
netqasm_factory,
73-
virtual_network,
74-
)
75-
except Exception as e:
76-
logger.error(
77-
"START_QNODEOS %s: Critical error when connection to local virtual node: %s",
78-
my_name,
79-
e,
80-
)
81-
reactor.stop()
64+
except Exception as err:
65+
if attempt > simulaqron_settings.conn_max_retries:
66+
logger.exception(
67+
"START_QNODEOS %s: Exhausted the maximum number of attempts to connect to local virtual node",
68+
my_name,
69+
exc_info=err,
70+
)
71+
reactor.stop()
72+
return
73+
else:
74+
logger.debug("START_QNODEOS %s: Could not connect to Virtual node (%s, %d), trying again...", my_name,
75+
virtual_node_hostname, virtual_node_port, exc_info=err)
76+
reactor.callLater(
77+
simulaqron_settings.conn_retry_time,
78+
_connect_to_virt_node,
79+
my_name,
80+
netqasm_factory,
81+
virtual_network,
82+
attempt + 1
83+
)
8284

8385

8486
def _setup_netqasm_server(my_name: str, netqasm_factory: NetQASMFactory):
@@ -120,7 +122,7 @@ def _sigterm_handler(_signo, _stack_frame):
120122
reactor.stop()
121123

122124

123-
def start_qnodeos(node_name: str, network_config_file: Path, network_name: str = "default", log_level: str = "WARNING"):
125+
def start_qnodeos(node_name: str, network_config_file: Path, network_name: str):
124126
"""
125127
Start the QNPU that accepts NetQASM subroutines, and sends them as instructions to the SimulaQron virtual node
126128
backend over twisted PB (Native Mode SimulaQron).
@@ -131,8 +133,6 @@ def start_qnodeos(node_name: str, network_config_file: Path, network_name: str =
131133
:type network_config_file: Path
132134
:param network_name: Name of the network (e.g., 'default').
133135
:type network_name: str
134-
:param log_level: Logging level (e.g., 'DEBUG', 'INFO', 'WARNING').
135-
:type log_level: str
136136
"""
137137

138138
# Let's ensure we read the config file

simulaqron/start/start_vnode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def start_vnode(name: str, network_config_file: Path, network_name: str, nodes_r
9494
logger.debug("START_VNODE: Starting VIRTUAL NODE %s", name)
9595

9696
# Start the backend with the parameters configured in the simulaqron log file
97-
be = Backend(name, network_name=network_name)
97+
be = Backend(name, nodes_running, network_name=network_name)
9898
be.start(max_qubits=simulaqron_settings.max_qubits, max_registers=simulaqron_settings.max_registers)
9999

100100
# Print a message we have terminated the node.

simulaqron/virtual_node/virtual.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ def __init__(self, ID: Host, config: SocketsConfig,
176176
:type maxRegisters: int
177177
"""
178178
self._logger = logging.getLogger(f"{self.__class__.__name__}({ID.name})")
179+
self._attempt = 0
179180

180181
# Store our own host identifiers and configuration
181182
self.myID = ID
@@ -203,7 +204,7 @@ def __init__(self, ID: Host, config: SocketsConfig,
203204
self.virtQubits = []
204205
self.simQubits = []
205206

206-
# Set up connections to the neighouring nodes in the network
207+
# Set up connections to the (Virtual Nodes processes) neighbouring nodes in the network
207208
self.connectNet()
208209

209210
# Global lock: needs to be acquire whenever we want to manipulate more than one
@@ -293,7 +294,7 @@ def handle_connection(self, obj, node: Host):
293294
# Add this node to the local connections
294295
self.conn[node.name] = node
295296

296-
def handle_connection_error(self, reason, node):
297+
def handle_connection_error(self, reason, node: Host):
297298
"""
298299
Handles errors from trying to connect to other node.
299300
If a ConnectionRefusedError is raised another try will be made after `conn_retry_time`` seconds
@@ -303,13 +304,20 @@ def handle_connection_error(self, reason, node):
303304

304305
try:
305306
reason.raiseException()
306-
except ConnectionRefusedError as err:
307-
self._logger.debug("Could not connect to %s (%s, %d), trying again...",
308-
node.name, node.hostname, node.port, exc_info=err)
309-
reactor.callLater(simulaqron_settings.conn_retry_time, self.connect_to_node, node)
310-
except Exception as e:
311-
self._logger.exception(e)
312-
reactor.stop()
307+
except Exception as err:
308+
if self._attempt > simulaqron_settings.conn_max_retries:
309+
self._logger.exception(
310+
"Exhausted the maximum number of attempts to connect to neighbour virtual node '%s'",
311+
node.hostname,
312+
exc_info=err
313+
)
314+
reactor.stop()
315+
else:
316+
self._logger.debug("Could not connect to %s (%s, %d), trying again...",
317+
node.name, node.hostname, node.port, exc_info=err)
318+
self._attempt = self._attempt + 1
319+
reactor.callLater(simulaqron_settings.conn_retry_time, self.connect_to_node, node)
320+
313321

314322
def _get_virtual_id(self):
315323
"""

0 commit comments

Comments
 (0)