bugfix: improve exception handling for sunnylinkd (SUN-89) (#1207)

* bugfix: improve exception handling for WebSocket connections in sunnylinkd

* bugfix: enhance exception handling for WebSocket connections in sunnylinkd

* bugfix: improve OSError handling in sunnylinkd for better error reporting
This commit is contained in:
DevTekVE
2025-09-04 14:45:03 +02:00
committed by GitHub
parent 0593667601
commit 8ccb777192

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import base64
import errno
import gzip
import os
import ssl
@@ -17,7 +18,7 @@ from openpilot.common.swaglog import cloudlog
from openpilot.system.athena.athenad import ws_send, jsonrpc_handler, \
recv_queue, UploadQueueCache, upload_queue, cur_upload_items, backoff, ws_manage, log_handler, start_local_proxy_shim, upload_handler
from websocket import (ABNF, WebSocket, WebSocketException, WebSocketTimeoutException,
create_connection)
create_connection, WebSocketConnectionClosedException)
import cereal.messaging as messaging
from sunnypilot.sunnylink.api import SunnylinkApi
@@ -107,10 +108,13 @@ def ws_recv(ws: WebSocket, end_event: threading.Event) -> None:
except WebSocketTimeoutException:
ns_since_last_ping = int(time.monotonic() * 1e9) - last_ping
if ns_since_last_ping > SUNNYLINK_RECONNECT_TIMEOUT_S * 1e9:
cloudlog.exception("sunnylinkd.ws_recv.timeout")
cloudlog.warning("sunnylinkd.ws_recv.timeout")
end_event.set()
except Exception:
cloudlog.exception("sunnylinkd.ws_recv.exception")
except Exception as e:
if isinstance(e, WebSocketConnectionClosedException):
cloudlog.warning(f"sunnylinkd.ws_recv.{type(e).__name__}")
else:
cloudlog.exception("sunnylinkd.ws_recv.exception")
end_event.set()
@@ -137,11 +141,15 @@ def ws_queue(end_event: threading.Event) -> None:
sunnylink_api.resume_queued(timeout=29)
resume_requested = True
tries = 0
except Exception:
cloudlog.exception("sunnylinkd.ws_queue.resume_queued.exception")
except Exception as e:
if isinstance(e, (ConnectionError, TimeoutError)):
cloudlog.warning(f"sunnylinkd.ws_queue.resume_queued.{type(e).__name__}")
else:
cloudlog.exception("sunnylinkd.ws_queue.resume_queued.exception")
resume_requested = False
tries += 1
time.sleep(backoff(tries)) # Wait for the backoff time before the next attempt
time.sleep(backoff(tries))
if end_event.is_set():
cloudlog.debug("end_event is set, exiting ws_queue thread")
@@ -252,14 +260,19 @@ def main(exit_event: threading.Event = None):
handle_long_poll(ws, exit_event)
except (KeyboardInterrupt, SystemExit):
break
except (ConnectionError, TimeoutError, WebSocketException):
except Exception as e:
conn_retries += 1
params.remove("LastSunnylinkPingTime")
except Exception:
cloudlog.exception("sunnylinkd.main.exception")
conn_retries += 1
params.remove("LastSunnylinkPingTime")
if isinstance(e, (ConnectionError, TimeoutError, WebSocketException)):
cloudlog.warning(f"sunnylinkd.main.{type(e).__name__}")
elif isinstance(e, OSError):
name = errno.errorcode.get(e.errno or -1, "UNKNOWN")
msg = f"sunnylinkd.main.OSError.{name} ({e.errno})"
is_expected_error = e.errno in (errno.ENETDOWN, errno.ENETRESET, errno.ENETUNREACH)
cloudlog.warning(msg) if is_expected_error else cloudlog.exception(msg)
else:
cloudlog.exception("sunnylinkd.main.exception")
time.sleep(backoff(conn_retries))