Skip to content

Commit 19703f4

Browse files
authored
fix: harden the device connection logic used in startup (#666)
* fix: harden the initial startup logic We currently see "Initial connection attempt took longer than expected" and this is an attempt to see if the reason is due to uncaught exceptions. This adds some exception handling for uncaught exceptions to make sure the connection attempt gets fired. * chore: fix logging * chore: revert whitespace change * chore: reduce whitespace changes * chore: clarify comments and docstrings
1 parent 54e7f7a commit 19703f4

File tree

2 files changed

+46
-10
lines changed

2 files changed

+46
-10
lines changed

roborock/devices/device.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -147,34 +147,45 @@ async def start_connect(self) -> None:
147147
called. The device will automatically attempt to reconnect if the connection
148148
is lost.
149149
"""
150-
start_attempt: asyncio.Event = asyncio.Event()
150+
# The future will be set to True if the first attempt succeeds, False if
151+
# it fails, or an exception if an unexpected error occurs.
152+
# We use this to wait a short time for the first attempt to complete. We
153+
# don't actually care about the result, just that we waited long enough.
154+
start_attempt: asyncio.Future[bool] = asyncio.Future()
151155

152156
async def connect_loop() -> None:
153-
backoff = MIN_BACKOFF_INTERVAL
154157
try:
158+
backoff = MIN_BACKOFF_INTERVAL
155159
while True:
156160
try:
157161
await self.connect()
158-
start_attempt.set()
162+
if not start_attempt.done():
163+
start_attempt.set_result(True)
159164
self._has_connected = True
160165
self._ready_callbacks(self)
161166
return
162167
except RoborockException as e:
163-
start_attempt.set()
168+
if not start_attempt.done():
169+
start_attempt.set_result(False)
164170
self._logger.info("Failed to connect (retry %s): %s", backoff.total_seconds(), e)
165171
await asyncio.sleep(backoff.total_seconds())
166172
backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_INTERVAL)
173+
except Exception as e: # pylint: disable=broad-except
174+
if not start_attempt.done():
175+
start_attempt.set_exception(e)
176+
self._logger.exception("Uncaught error during connect: %s", e)
177+
return
167178
except asyncio.CancelledError:
168179
self._logger.debug("connect_loop was cancelled for device %s", self.duid)
169-
# Clean exit on cancellation
170-
return
171180
finally:
172-
start_attempt.set()
181+
if not start_attempt.done():
182+
start_attempt.set_result(False)
173183

174184
self._connect_task = asyncio.create_task(connect_loop())
175185

176186
try:
177-
await asyncio.wait_for(start_attempt.wait(), timeout=START_ATTEMPT_TIMEOUT.total_seconds())
187+
async with asyncio.timeout(START_ATTEMPT_TIMEOUT.total_seconds()):
188+
await start_attempt
178189
except TimeoutError:
179190
self._logger.debug("Initial connection attempt took longer than expected, will keep trying in background")
180191

@@ -189,6 +200,7 @@ async def connect(self) -> None:
189200
except RoborockException:
190201
unsub()
191202
raise
203+
self._logger.info("Connected to device")
192204
self._unsub = unsub
193205

194206
async def close(self) -> None:

tests/devices/test_device_manager.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,17 @@ def mock_sleep() -> Generator[None, None, None]:
6666
yield
6767

6868

69+
@pytest.fixture(name="channel_exception")
70+
def channel_failure_exception_fixture(mock_rpc_channel: AsyncMock) -> Exception:
71+
"""Fixture that provides the exception to be raised by the failing channel."""
72+
return RoborockException("Connection failed")
73+
74+
6975
@pytest.fixture(name="channel_failure")
70-
def channel_failure_fixture(mock_rpc_channel: AsyncMock) -> Generator[Mock, None, None]:
76+
def channel_failure_fixture(mock_rpc_channel: AsyncMock, channel_exception: Exception) -> Generator[Mock, None, None]:
7177
"""Fixture that makes channel subscribe fail."""
7278
with patch("roborock.devices.device_manager.create_v1_channel") as mock_channel:
73-
mock_channel.return_value.subscribe = AsyncMock(side_effect=RoborockException("Connection failed"))
79+
mock_channel.return_value.subscribe = AsyncMock(side_effect=channel_exception)
7480
mock_channel.return_value.is_connected = False
7581
mock_channel.return_value.rpc_channel = mock_rpc_channel
7682
yield mock_channel
@@ -192,6 +198,12 @@ async def test_ready_callback(home_data: HomeData) -> None:
192198
await device_manager.close()
193199

194200

201+
@pytest.mark.parametrize(
202+
("channel_exception"),
203+
[
204+
RoborockException("Connection failed"),
205+
],
206+
)
195207
async def test_start_connect_failure(home_data: HomeData, channel_failure: Mock, mock_sleep: Mock) -> None:
196208
"""Test that start_connect retries when connection fails."""
197209
ready_devices: list[RoborockDevice] = []
@@ -231,3 +243,15 @@ async def test_start_connect_failure(home_data: HomeData, channel_failure: Mock,
231243

232244
await device_manager.close()
233245
assert mock_unsub.call_count == 1
246+
247+
248+
@pytest.mark.parametrize(
249+
("channel_exception"),
250+
[
251+
Exception("Unexpected error"),
252+
],
253+
)
254+
async def test_start_connect_unexpected_error(home_data: HomeData, channel_failure: Mock, mock_sleep: Mock) -> None:
255+
"""Test that some unexpected errors from start_connect are propagated."""
256+
with pytest.raises(Exception, match="Unexpected error"):
257+
await create_device_manager(USER_PARAMS)

0 commit comments

Comments
 (0)