bugfix: improve exception handling for sunnylinkd (SUN-89) (#1207)
* bugfix: improve exception handling for WebSocket connections in sunnylinkd * bugfix: enhance exception handling for WebSocket connections in sunnylinkd * bugfix: improve OSError handling in sunnylinkd for better error reporting
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import errno
|
||||
import gzip
|
||||
import os
|
||||
import ssl
|
||||
@@ -17,7 +18,7 @@ from openpilot.common.swaglog import cloudlog
|
||||
from openpilot.system.athena.athenad import ws_send, jsonrpc_handler, \
|
||||
recv_queue, UploadQueueCache, upload_queue, cur_upload_items, backoff, ws_manage, log_handler, start_local_proxy_shim, upload_handler
|
||||
from websocket import (ABNF, WebSocket, WebSocketException, WebSocketTimeoutException,
|
||||
create_connection)
|
||||
create_connection, WebSocketConnectionClosedException)
|
||||
|
||||
import cereal.messaging as messaging
|
||||
from sunnypilot.sunnylink.api import SunnylinkApi
|
||||
@@ -107,10 +108,13 @@ def ws_recv(ws: WebSocket, end_event: threading.Event) -> None:
|
||||
except WebSocketTimeoutException:
|
||||
ns_since_last_ping = int(time.monotonic() * 1e9) - last_ping
|
||||
if ns_since_last_ping > SUNNYLINK_RECONNECT_TIMEOUT_S * 1e9:
|
||||
cloudlog.exception("sunnylinkd.ws_recv.timeout")
|
||||
cloudlog.warning("sunnylinkd.ws_recv.timeout")
|
||||
end_event.set()
|
||||
except Exception:
|
||||
cloudlog.exception("sunnylinkd.ws_recv.exception")
|
||||
except Exception as e:
|
||||
if isinstance(e, WebSocketConnectionClosedException):
|
||||
cloudlog.warning(f"sunnylinkd.ws_recv.{type(e).__name__}")
|
||||
else:
|
||||
cloudlog.exception("sunnylinkd.ws_recv.exception")
|
||||
end_event.set()
|
||||
|
||||
|
||||
@@ -137,11 +141,15 @@ def ws_queue(end_event: threading.Event) -> None:
|
||||
sunnylink_api.resume_queued(timeout=29)
|
||||
resume_requested = True
|
||||
tries = 0
|
||||
except Exception:
|
||||
cloudlog.exception("sunnylinkd.ws_queue.resume_queued.exception")
|
||||
except Exception as e:
|
||||
if isinstance(e, (ConnectionError, TimeoutError)):
|
||||
cloudlog.warning(f"sunnylinkd.ws_queue.resume_queued.{type(e).__name__}")
|
||||
else:
|
||||
cloudlog.exception("sunnylinkd.ws_queue.resume_queued.exception")
|
||||
|
||||
resume_requested = False
|
||||
tries += 1
|
||||
time.sleep(backoff(tries)) # Wait for the backoff time before the next attempt
|
||||
time.sleep(backoff(tries))
|
||||
|
||||
if end_event.is_set():
|
||||
cloudlog.debug("end_event is set, exiting ws_queue thread")
|
||||
@@ -252,14 +260,19 @@ def main(exit_event: threading.Event = None):
|
||||
handle_long_poll(ws, exit_event)
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
break
|
||||
except (ConnectionError, TimeoutError, WebSocketException):
|
||||
except Exception as e:
|
||||
conn_retries += 1
|
||||
params.remove("LastSunnylinkPingTime")
|
||||
except Exception:
|
||||
cloudlog.exception("sunnylinkd.main.exception")
|
||||
|
||||
conn_retries += 1
|
||||
params.remove("LastSunnylinkPingTime")
|
||||
if isinstance(e, (ConnectionError, TimeoutError, WebSocketException)):
|
||||
cloudlog.warning(f"sunnylinkd.main.{type(e).__name__}")
|
||||
elif isinstance(e, OSError):
|
||||
name = errno.errorcode.get(e.errno or -1, "UNKNOWN")
|
||||
msg = f"sunnylinkd.main.OSError.{name} ({e.errno})"
|
||||
is_expected_error = e.errno in (errno.ENETDOWN, errno.ENETRESET, errno.ENETUNREACH)
|
||||
cloudlog.warning(msg) if is_expected_error else cloudlog.exception(msg)
|
||||
else:
|
||||
cloudlog.exception("sunnylinkd.main.exception")
|
||||
|
||||
time.sleep(backoff(conn_retries))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user