From 8ccb777192535be10e072eba45db4c05c134fc5a Mon Sep 17 00:00:00 2001 From: DevTekVE Date: Thu, 4 Sep 2025 14:45:03 +0200 Subject: [PATCH] bugfix: improve exception handling for sunnylinkd (SUN-89) (#1207) * bugfix: improve exception handling for WebSocket connections in sunnylinkd * bugfix: enhance exception handling for WebSocket connections in sunnylinkd * bugfix: improve OSError handling in sunnylinkd for better error reporting --- sunnypilot/sunnylink/athena/sunnylinkd.py | 37 +++++++++++++++-------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/sunnypilot/sunnylink/athena/sunnylinkd.py b/sunnypilot/sunnylink/athena/sunnylinkd.py index 90eae1dfe..a9204d54e 100755 --- a/sunnypilot/sunnylink/athena/sunnylinkd.py +++ b/sunnypilot/sunnylink/athena/sunnylinkd.py @@ -3,6 +3,7 @@ from __future__ import annotations import base64 +import errno import gzip import os import ssl @@ -17,7 +18,7 @@ from openpilot.common.swaglog import cloudlog from openpilot.system.athena.athenad import ws_send, jsonrpc_handler, \ recv_queue, UploadQueueCache, upload_queue, cur_upload_items, backoff, ws_manage, log_handler, start_local_proxy_shim, upload_handler from websocket import (ABNF, WebSocket, WebSocketException, WebSocketTimeoutException, - create_connection) + create_connection, WebSocketConnectionClosedException) import cereal.messaging as messaging from sunnypilot.sunnylink.api import SunnylinkApi @@ -107,10 +108,13 @@ def ws_recv(ws: WebSocket, end_event: threading.Event) -> None: except WebSocketTimeoutException: ns_since_last_ping = int(time.monotonic() * 1e9) - last_ping if ns_since_last_ping > SUNNYLINK_RECONNECT_TIMEOUT_S * 1e9: - cloudlog.exception("sunnylinkd.ws_recv.timeout") + cloudlog.warning("sunnylinkd.ws_recv.timeout") end_event.set() - except Exception: - cloudlog.exception("sunnylinkd.ws_recv.exception") + except Exception as e: + if isinstance(e, WebSocketConnectionClosedException): + cloudlog.warning(f"sunnylinkd.ws_recv.{type(e).__name__}") + else: + cloudlog.exception("sunnylinkd.ws_recv.exception") end_event.set() @@ -137,11 +141,15 @@ def ws_queue(end_event: threading.Event) -> None: sunnylink_api.resume_queued(timeout=29) resume_requested = True tries = 0 - except Exception: - cloudlog.exception("sunnylinkd.ws_queue.resume_queued.exception") + except Exception as e: + if isinstance(e, (ConnectionError, TimeoutError)): + cloudlog.warning(f"sunnylinkd.ws_queue.resume_queued.{type(e).__name__}") + else: + cloudlog.exception("sunnylinkd.ws_queue.resume_queued.exception") + resume_requested = False tries += 1 - time.sleep(backoff(tries)) # Wait for the backoff time before the next attempt + time.sleep(backoff(tries)) if end_event.is_set(): cloudlog.debug("end_event is set, exiting ws_queue thread") @@ -252,14 +260,19 @@ def main(exit_event: threading.Event = None): handle_long_poll(ws, exit_event) except (KeyboardInterrupt, SystemExit): break - except (ConnectionError, TimeoutError, WebSocketException): + except Exception as e: conn_retries += 1 params.remove("LastSunnylinkPingTime") - except Exception: - cloudlog.exception("sunnylinkd.main.exception") - conn_retries += 1 - params.remove("LastSunnylinkPingTime") + if isinstance(e, (ConnectionError, TimeoutError, WebSocketException)): + cloudlog.warning(f"sunnylinkd.main.{type(e).__name__}") + elif isinstance(e, OSError): + name = errno.errorcode.get(e.errno or -1, "UNKNOWN") + msg = f"sunnylinkd.main.OSError.{name} ({e.errno})" + is_expected_error = e.errno in (errno.ENETDOWN, errno.ENETRESET, errno.ENETUNREACH) + cloudlog.warning(msg) if is_expected_error else cloudlog.exception(msg) + else: + cloudlog.exception("sunnylinkd.main.exception") time.sleep(backoff(conn_retries))