From 9c34635fda8c0ae5be42ef40b7a55ed31bad62e7 Mon Sep 17 00:00:00 2001 From: DevTekVE Date: Sat, 22 Jun 2024 19:06:17 +0000 Subject: [PATCH] [sunnylink] Refactor sunnylink registration process and more resiliency --- common/api/sunnylink.py | 38 +++++++++++--- launch_chffrplus.sh | 2 +- .../offroad/sunnypilot/sunnylink_settings.cc | 4 -- selfdrive/ui/qt/sidebar.cc | 28 ++++++----- selfdrive/ui/qt/sidebar.h | 1 + system/athena/manage_athenad.py | 2 +- system/athena/sunnylinkd.py | 27 ++++++++-- system/manager/manager.py | 4 +- system/manager/process_config.py | 26 ++++++---- system/manager/sunnylink.py | 50 ++++++++++++++++--- 10 files changed, 134 insertions(+), 48 deletions(-) diff --git a/common/api/sunnylink.py b/common/api/sunnylink.py index e2761cc29..4318a27c9 100644 --- a/common/api/sunnylink.py +++ b/common/api/sunnylink.py @@ -82,15 +82,17 @@ class SunnylinkApi(BaseApi): privkey_path = Path(Paths.persist_root()+"/comma/id_rsa") pubkey_path = Path(Paths.persist_root()+"/comma/id_rsa.pub") + start_time = time.monotonic() + successful_registration = False if not pubkey_path.is_file(): sunnylink_dongle_id = UNREGISTERED_SUNNYLINK_DONGLE_ID self._status_update("Public key not found, setting dongle ID to unregistered.") else: + Params().put("LastSunnylinkPingTime", "0") # Reset the last ping time to 0 if we are trying to register with pubkey_path.open() as f1, privkey_path.open() as f2: public_key = f1.read() private_key = f2.read() - start_time = time.monotonic() backoff = 1 while True: register_token = jwt.encode({'register': True, 'exp': datetime.utcnow() + timedelta(hours=1)}, private_key, algorithm='RS256') @@ -102,17 +104,29 @@ class SunnylinkApi(BaseApi): resp = self.api_get("v2/pilotauth/", method='POST', timeout=15, imei=imei1, imei2=imei2, serial=serial, comma_dongle_id=comma_dongle_id, public_key=public_key, register_token=register_token) + if resp.status_code in (409, 412): + timeout = time.monotonic() - start_time # Don't retry if the public key is already in use + key_in_use = "Public key is already in use, is your key unique? Contact your vendor for a new key." + unsafe_key = "Public key is known to not be unique and it's unsafe. Contact your vendor for a new key." + error_message = key_in_use if resp.status_code == 409 else unsafe_key + raise Exception(error_message) + if resp.status_code != 200: - raise Exception(f"Failed to register with sunnylink. Status code: {resp.status_code}") - else: - dongleauth = json.loads(resp.text) - sunnylink_dongle_id = dongleauth["device_id"] - if sunnylink_dongle_id: - self._status_update("Device registered successfully.") - break + raise Exception(f"Failed to register with sunnylink. Status code: {resp.status_code}\nData\n:{resp.text}") + + dongleauth = json.loads(resp.text) + sunnylink_dongle_id = dongleauth["device_id"] + if sunnylink_dongle_id: + self._status_update("Device registered successfully.") + successful_registration = True + break except Exception as e: if verbose: self._status_update(f"Waiting {backoff}s before retry, Exception occurred during registration: [{str(e)}]") + + with open('/data/community/crashes/error.txt', 'a') as f: + f.write(f"[{datetime.now()}] sunnylink: {str(e)}\n") + backoff = min(backoff * 2, 60) time.sleep(backoff) @@ -123,5 +137,13 @@ class SunnylinkApi(BaseApi): self.params.put("SunnylinkDongleId", sunnylink_dongle_id or UNREGISTERED_SUNNYLINK_DONGLE_ID) + # Set the last ping time to the current time since we were just talking to the API + last_ping = int(time.monotonic() * 1e9) if successful_registration else start_time + Params().put("LastSunnylinkPingTime", str(last_ping)) + + # Disable sunnylink if registration was not successful + if not successful_registration: + Params().put_bool("SunnylinkEnabled", False) + self.spinner = None return sunnylink_dongle_id diff --git a/launch_chffrplus.sh b/launch_chffrplus.sh index c5fedf68a..8b213c097 100755 --- a/launch_chffrplus.sh +++ b/launch_chffrplus.sh @@ -87,7 +87,7 @@ function launch { ./build.py fi - ./sunnylink.py; ./mapd_installer.py; ./manager.py + ./mapd_installer.py; ./manager.py # if broken, keep on screen error while true; do sleep 1; done diff --git a/selfdrive/ui/qt/offroad/sunnypilot/sunnylink_settings.cc b/selfdrive/ui/qt/offroad/sunnypilot/sunnylink_settings.cc index 118aa4215..1ebaf4f14 100644 --- a/selfdrive/ui/qt/offroad/sunnypilot/sunnylink_settings.cc +++ b/selfdrive/ui/qt/offroad/sunnypilot/sunnylink_settings.cc @@ -45,10 +45,6 @@ SunnylinkPanel::SunnylinkPanel(QWidget* parent) : QFrame(parent) { sunnylinkEnabledBtn->setDescription(shame_description); } - auto dialog_text = tr("A reboot is required to") + " " + (enabled ? tr("start") : tr("stop")) +" "+ tr("all connections and processes from sunnylink.") + "
"+ tr("If that's not a problem for you, you can ignore this.")+ ""; - if (ConfirmationDialog::confirm(dialog_text, tr("Reboot Now!"), this)) { - Hardware::reboot(); - } updateLabels(); }); diff --git a/selfdrive/ui/qt/sidebar.cc b/selfdrive/ui/qt/sidebar.cc index 5d8245acc..8d45e62ce 100644 --- a/selfdrive/ui/qt/sidebar.cc +++ b/selfdrive/ui/qt/sidebar.cc @@ -155,23 +155,25 @@ void Sidebar::updateState(const UIState &s) { setProperty("pandaStatus", QVariant::fromValue(pandaStatus)); ItemStatus sunnylinkStatus; + auto sl_dongle_id = getSunnylinkDongleId(); auto last_sunnylink_ping_str = params.get("LastSunnylinkPingTime"); auto last_sunnylink_ping = std::stoull(last_sunnylink_ping_str.empty() ? "0" : last_sunnylink_ping_str); - auto current_nanos = nanos_since_boot(); - auto elapsed_sunnylink_ping = current_nanos - last_sunnylink_ping; + auto elapsed_sunnylink_ping = nanos_since_boot() - last_sunnylink_ping; auto sunnylink_enabled = params.getBool("SunnylinkEnabled"); - if (!sunnylink_enabled) { - sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("DISABLED")}, disabled_color}; - } else if (last_sunnylink_ping == 0) { - sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("OFFLINE")}, warning_color}; - } else { - if (elapsed_sunnylink_ping < 80000000000ULL) { - sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("ONLINE")}, good_color}; - } - else { - sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("ERROR")}, danger_color}; - } + + QString status = tr("DISABLED"); + QColor color = disabled_color; + + if (sunnylink_enabled && last_sunnylink_ping == 0) { + // If sunnylink is enabled, but we don't have a dongle id, and we haven't received a ping yet, we are registering + status = sl_dongle_id.has_value() ? tr("OFFLINE") : tr("REGIST..."); + color = sl_dongle_id.has_value() ? warning_color : progress_color; + } else if (sunnylink_enabled) { + // If sunnylink is enabled, we are considered online if we have received a ping in the last 80 seconds, else error. + status = elapsed_sunnylink_ping < 80000000000ULL ? tr("ONLINE") : tr("ERROR"); + color = elapsed_sunnylink_ping < 80000000000ULL ? good_color : danger_color; } + sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), status}, color }; setProperty("sunnylinkStatus", QVariant::fromValue(sunnylinkStatus)); } diff --git a/selfdrive/ui/qt/sidebar.h b/selfdrive/ui/qt/sidebar.h index be2c34cae..9012f2513 100644 --- a/selfdrive/ui/qt/sidebar.h +++ b/selfdrive/ui/qt/sidebar.h @@ -52,6 +52,7 @@ protected: const QRect home_btn = QRect(60, 860, 180, 180); const QRect settings_btn = QRect(50, 35, 200, 117); const QColor good_color = QColor(255, 255, 255); + const QColor progress_color = QColor(3, 132, 252); const QColor warning_color = QColor(218, 202, 37); const QColor danger_color = QColor(201, 34, 49); const QColor disabled_color = QColor(128, 128, 128); diff --git a/system/athena/manage_athenad.py b/system/athena/manage_athenad.py index fb514bc1c..7158cd922 100755 --- a/system/athena/manage_athenad.py +++ b/system/athena/manage_athenad.py @@ -18,7 +18,7 @@ def main(): def manage_athenad(dongle_id_param, pid_param, process_name, target): params = Params() - dongle_id = params.get(dongle_id_param).decode('utf-8') + dongle_id = params.get(dongle_id_param, encoding='utf-8') build_metadata = get_build_metadata() cloudlog.bind_global(dongle_id=dongle_id, diff --git a/system/athena/sunnylinkd.py b/system/athena/sunnylinkd.py index 67ae8a95a..ca24f2409 100755 --- a/system/athena/sunnylinkd.py +++ b/system/athena/sunnylinkd.py @@ -18,6 +18,7 @@ from openpilot.common.api import SunnylinkApi from openpilot.common.params import Params from openpilot.common.realtime import set_core_affinity from openpilot.common.swaglog import cloudlog +from openpilot.system.manager.sunnylink import sunnylink_need_register, sunnylink_ready import cereal.messaging as messaging SUNNYLINK_ATHENA_HOST = os.getenv('SUNNYLINK_ATHENA_HOST', 'wss://ws.stg.api.sunnypilot.ai') @@ -25,6 +26,7 @@ HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4")) LOCAL_PORT_WHITELIST = {8022} SUNNYLINK_LOG_ATTR_NAME = "user.sunny.upload" SUNNYLINK_RECONNECT_TIMEOUT_S = 70 # FYI changing this will also would require a change on sidebar.cc +DISALLOW_LOG_UPLOAD = threading.Event() params = Params() sunnylink_api = SunnylinkApi(params.get("SunnylinkDongleId", encoding='utf-8')) @@ -52,6 +54,10 @@ def handle_long_poll(ws: WebSocket, exit_event: threading.Event | None) -> None: thread.start() try: while not end_event.wait(0.1): + if not sunnylink_ready(params): + cloudlog.warning("Exiting sunnylinkd.handle_long_poll as SunnylinkEnabled is False") + break + sm.update(0) if exit_event is not None and exit_event.is_set(): end_event.set() @@ -60,10 +66,13 @@ def handle_long_poll(ws: WebSocket, exit_event: threading.Event | None) -> None: prime_type = params.get("PrimeType", encoding='utf-8') or 0 metered = sm['deviceState'].networkMetered - if metered and int(prime_type) > 2: + if DISALLOW_LOG_UPLOAD.is_set() and not comma_prime_cellular_end_event.is_set(): + cloudlog.debug(f"sunnylinkd.handle_long_poll: DISALLOW_LOG_UPLOAD, setting comma_prime_cellular_end_event") + comma_prime_cellular_end_event.set() + elif metered and int(prime_type) > 2: cloudlog.debug(f"sunnylinkd.handle_long_poll: PrimeType({prime_type}) > 2 and networkMetered({metered})") comma_prime_cellular_end_event.set() - elif comma_prime_cellular_end_event.is_set(): + elif comma_prime_cellular_end_event.is_set() and not DISALLOW_LOG_UPLOAD.is_set(): cloudlog.debug(f"sunnylinkd.handle_long_poll: comma_prime_cellular_end_event is set and not PrimeType({prime_type}) > 2 or not networkMetered({metered})") comma_prime_cellular_end_event.clear() finally: @@ -142,6 +151,10 @@ def sunny_log_handler(end_event: threading.Event, comma_prime_cellular_end_event comma_prime_cellular_end_event.set() +@dispatcher.add_method +def toggleLogUpload(enabled: bool): + DISALLOW_LOG_UPLOAD.clear() if enabled and DISALLOW_LOG_UPLOAD.is_set() else DISALLOW_LOG_UPLOAD.set() + @dispatcher.add_method def getParamsAllKeys() -> list[str]: keys: list[str] = [k.decode('utf-8') for k in Params().all_keys()] @@ -185,12 +198,16 @@ def main(exit_event: threading.Event = None): except Exception: cloudlog.exception("failed to set core affinity") + while sunnylink_need_register(params): + cloudlog.info("Waiting for sunnylink registration to complete") + time.sleep(10) + UploadQueueCache.initialize(upload_queue) ws_uri = SUNNYLINK_ATHENA_HOST conn_start = None conn_retries = 0 - while exit_event is None or not exit_event.is_set(): + while (exit_event is None or not exit_event.is_set()) and sunnylink_ready(params): try: if conn_start is None: conn_start = time.monotonic() @@ -221,6 +238,10 @@ def main(exit_event: threading.Event = None): time.sleep(backoff(conn_retries)) + if not sunnylink_ready(params): + cloudlog.debug("Reached end of sunnylinkd.main while sunnylink is not ready. Waiting 60s before retrying") + time.sleep(60) + if __name__ == "__main__": main() diff --git a/system/manager/manager.py b/system/manager/manager.py index 07ebe0d38..f3269ee4c 100755 --- a/system/manager/manager.py +++ b/system/manager/manager.py @@ -8,6 +8,7 @@ import traceback from cereal import custom import cereal.messaging as messaging import openpilot.system.sentry as sentry +from openpilot.common.api.sunnylink import UNREGISTERED_SUNNYLINK_DONGLE_ID from openpilot.common.params import Params, ParamKeyType from openpilot.common.text_window import TextWindow from openpilot.system.hardware import HARDWARE, PC @@ -105,7 +106,8 @@ def manager_init() -> None: ("OsmDownloadedDate", "0"), ("OSMDownloadProgress", "{}"), ("SidebarTemperatureOptions", "0"), - ("SunnylinkEnabled", "0"), + ("SunnylinkEnabled", "0" if (build_metadata.release_channel or build_metadata.release_sp_channel) else "1"), + ("SunnylinkDongleId", f"{UNREGISTERED_SUNNYLINK_DONGLE_ID}"), ("CustomDrivingModel", "0"), ("DrivingModelGeneration", "4"), ("LastSunnylinkPingTime", "0"), diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 3ac3e1648..8a8cce69e 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -6,6 +6,7 @@ from openpilot.system.hardware import PC, TICI from openpilot.selfdrive.sunnypilot import get_model_generation from openpilot.system.manager.process import PythonProcess, NativeProcess, DaemonProcess from openpilot.system.mapd_manager import MAPD_PATH, COMMON_DIR +from openpilot.system.manager.sunnylink import sunnylink_need_register, sunnylink_ready WEBCAM = os.getenv("USE_WEBCAM") is not None @@ -47,6 +48,14 @@ def model_use_nav(started, params, CP: car.CarParams) -> bool: custom_model, model_gen = get_model_generation(params) return started and custom_model and model_gen not in (0, 4) +def sunnylink_ready_shim(started, params, CP: car.CarParams) -> bool: + """Shim for sunnylink_ready to match the process manager signature.""" + return sunnylink_ready(params) + +def sunnylink_need_register_shim(started, params, CP: car.CarParams) -> bool: + """Shim for sunnylink_need_register to match the process manager signature.""" + return sunnylink_need_register(params) + procs = [ DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"), @@ -102,17 +111,16 @@ procs = [ NativeProcess("bridge", "cereal/messaging", ["./bridge"], notcar), PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar), PythonProcess("webjoystick", "tools.bodyteleop.web", notcar), + + # Sunnylink <3 + DaemonProcess("manage_sunnylinkd", "system.athena.manage_sunnylinkd", "SunnylinkdPid"), + PythonProcess("sunnylink_registration", "system.manager.sunnylink", sunnylink_need_register_shim), ] -if Params().get_bool("SunnylinkEnabled"): - if os.path.exists("../athena/manage_sunnylinkd.py"): - procs += [ - DaemonProcess("manage_sunnylinkd", "system.athena.manage_sunnylinkd", "SunnylinkdPid"), - ] - if os.path.exists("../loggerd/sunnylink_uploader.py"): - procs += [ - PythonProcess("sunnylink_uploader", "system.loggerd.sunnylink_uploader", always_run), - ] +if os.path.exists("../loggerd/sunnylink_uploader.py"): + procs += [ + PythonProcess("sunnylink_uploader", "system.loggerd.sunnylink_uploader", sunnylink_ready_shim), + ] if os.path.exists("./gitlab_runner.sh") and not PC: # Only devs! diff --git a/system/manager/sunnylink.py b/system/manager/sunnylink.py index b329132bc..84161fb81 100755 --- a/system/manager/sunnylink.py +++ b/system/manager/sunnylink.py @@ -1,18 +1,44 @@ #!/usr/bin/env python3 - -from openpilot.common.api.sunnylink import SunnylinkApi +from cereal import log +from openpilot.common.api.sunnylink import SunnylinkApi, UNREGISTERED_SUNNYLINK_DONGLE_ID from openpilot.common.params import Params -from openpilot.common.spinner import Spinner +from openpilot.system.hardware import HARDWARE from openpilot.system.version import is_prebuilt +import time + +NetworkType = log.DeviceState.NetworkType -if __name__ == "__main__": - spinner = Spinner() +def is_network_connected() -> bool: + """Check if the device is connected to a network.""" + return HARDWARE.get_network_type() != NetworkType.none + + +def get_sunnylink_status(params=Params()) -> tuple[bool, bool]: + """Get the status of Sunnylink on the device. Returns a tuple of (is_sunnylink_enabled, is_registered).""" + is_sunnylink_enabled = params.get_bool("SunnylinkEnabled") + is_registered = params.get("SunnylinkDongleId", encoding='utf-8') not in (None, UNREGISTERED_SUNNYLINK_DONGLE_ID) + return is_sunnylink_enabled, is_registered + + +def sunnylink_ready(params=Params()) -> bool: + """Check if the device is ready to communicate with Sunnylink. That means it is enabled and registered.""" + is_sunnylink_enabled, is_registered = get_sunnylink_status(params) + return is_sunnylink_enabled and is_registered + + +def sunnylink_need_register(params=Params()) -> bool: + """Check if the device needs to be registered with Sunnylink.""" + is_sunnylink_enabled, is_registered = get_sunnylink_status(params) + return is_sunnylink_enabled and not is_registered and is_network_connected() + + +def register_sunnylink(): + """Register the device with Sunnylink if it is enabled.""" extra_args = {} if not Params().get_bool("SunnylinkEnabled"): print("Sunnylink is not enabled. Exiting.") - spinner.close() exit(0) if not is_prebuilt(): @@ -21,6 +47,14 @@ if __name__ == "__main__": "timeout": 60 } - sunnylink_id = SunnylinkApi(None).register_device(spinner, **extra_args) + sunnylink_id = SunnylinkApi(None).register_device(None, **extra_args) print(f"SunnyLinkId: {sunnylink_id}") - spinner.close() + + +def main(): + """The main method is expected to be called by the manager when the device boots up.""" + register_sunnylink() + + +if __name__ == "__main__": + main()