[sunnylink] Refactor sunnylink registration process and more resiliency

This commit is contained in:
DevTekVE 2024-06-22 19:06:17 +00:00
parent 2b6bfe32f4
commit 9c34635fda
10 changed files with 134 additions and 48 deletions

View File

@ -82,15 +82,17 @@ class SunnylinkApi(BaseApi):
privkey_path = Path(Paths.persist_root()+"/comma/id_rsa")
pubkey_path = Path(Paths.persist_root()+"/comma/id_rsa.pub")
start_time = time.monotonic()
successful_registration = False
if not pubkey_path.is_file():
sunnylink_dongle_id = UNREGISTERED_SUNNYLINK_DONGLE_ID
self._status_update("Public key not found, setting dongle ID to unregistered.")
else:
Params().put("LastSunnylinkPingTime", "0") # Reset the last ping time to 0 if we are trying to register
with pubkey_path.open() as f1, privkey_path.open() as f2:
public_key = f1.read()
private_key = f2.read()
start_time = time.monotonic()
backoff = 1
while True:
register_token = jwt.encode({'register': True, 'exp': datetime.utcnow() + timedelta(hours=1)}, private_key, algorithm='RS256')
@ -102,17 +104,29 @@ class SunnylinkApi(BaseApi):
resp = self.api_get("v2/pilotauth/", method='POST', timeout=15, imei=imei1, imei2=imei2, serial=serial, comma_dongle_id=comma_dongle_id, public_key=public_key, register_token=register_token)
if resp.status_code in (409, 412):
timeout = time.monotonic() - start_time # Don't retry if the public key is already in use
key_in_use = "Public key is already in use, is your key unique? Contact your vendor for a new key."
unsafe_key = "Public key is known to not be unique and it's unsafe. Contact your vendor for a new key."
error_message = key_in_use if resp.status_code == 409 else unsafe_key
raise Exception(error_message)
if resp.status_code != 200:
raise Exception(f"Failed to register with sunnylink. Status code: {resp.status_code}")
else:
raise Exception(f"Failed to register with sunnylink. Status code: {resp.status_code}\nData\n:{resp.text}")
dongleauth = json.loads(resp.text)
sunnylink_dongle_id = dongleauth["device_id"]
if sunnylink_dongle_id:
self._status_update("Device registered successfully.")
successful_registration = True
break
except Exception as e:
if verbose:
self._status_update(f"Waiting {backoff}s before retry, Exception occurred during registration: [{str(e)}]")
with open('/data/community/crashes/error.txt', 'a') as f:
f.write(f"[{datetime.now()}] sunnylink: {str(e)}\n")
backoff = min(backoff * 2, 60)
time.sleep(backoff)
@ -123,5 +137,13 @@ class SunnylinkApi(BaseApi):
self.params.put("SunnylinkDongleId", sunnylink_dongle_id or UNREGISTERED_SUNNYLINK_DONGLE_ID)
# Set the last ping time to the current time since we were just talking to the API
last_ping = int(time.monotonic() * 1e9) if successful_registration else start_time
Params().put("LastSunnylinkPingTime", str(last_ping))
# Disable sunnylink if registration was not successful
if not successful_registration:
Params().put_bool("SunnylinkEnabled", False)
self.spinner = None
return sunnylink_dongle_id

View File

@ -87,7 +87,7 @@ function launch {
./build.py
fi
./sunnylink.py; ./mapd_installer.py; ./manager.py
./mapd_installer.py; ./manager.py
# if broken, keep on screen error
while true; do sleep 1; done

View File

@ -45,10 +45,6 @@ SunnylinkPanel::SunnylinkPanel(QWidget* parent) : QFrame(parent) {
sunnylinkEnabledBtn->setDescription(shame_description);
}
auto dialog_text = tr("A reboot is required to") + " " + (enabled ? tr("start") : tr("stop")) +" "+ tr("all connections and processes from sunnylink.") + "<br/><small>"+ tr("If that's not a problem for you, you can ignore this.")+ "</small>";
if (ConfirmationDialog::confirm(dialog_text, tr("Reboot Now!"), this)) {
Hardware::reboot();
}
updateLabels();
});

View File

@ -155,23 +155,25 @@ void Sidebar::updateState(const UIState &s) {
setProperty("pandaStatus", QVariant::fromValue(pandaStatus));
ItemStatus sunnylinkStatus;
auto sl_dongle_id = getSunnylinkDongleId();
auto last_sunnylink_ping_str = params.get("LastSunnylinkPingTime");
auto last_sunnylink_ping = std::stoull(last_sunnylink_ping_str.empty() ? "0" : last_sunnylink_ping_str);
auto current_nanos = nanos_since_boot();
auto elapsed_sunnylink_ping = current_nanos - last_sunnylink_ping;
auto elapsed_sunnylink_ping = nanos_since_boot() - last_sunnylink_ping;
auto sunnylink_enabled = params.getBool("SunnylinkEnabled");
if (!sunnylink_enabled) {
sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("DISABLED")}, disabled_color};
} else if (last_sunnylink_ping == 0) {
sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("OFFLINE")}, warning_color};
} else {
if (elapsed_sunnylink_ping < 80000000000ULL) {
sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("ONLINE")}, good_color};
}
else {
sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), tr("ERROR")}, danger_color};
}
QString status = tr("DISABLED");
QColor color = disabled_color;
if (sunnylink_enabled && last_sunnylink_ping == 0) {
// If sunnylink is enabled, but we don't have a dongle id, and we haven't received a ping yet, we are registering
status = sl_dongle_id.has_value() ? tr("OFFLINE") : tr("REGIST...");
color = sl_dongle_id.has_value() ? warning_color : progress_color;
} else if (sunnylink_enabled) {
// If sunnylink is enabled, we are considered online if we have received a ping in the last 80 seconds, else error.
status = elapsed_sunnylink_ping < 80000000000ULL ? tr("ONLINE") : tr("ERROR");
color = elapsed_sunnylink_ping < 80000000000ULL ? good_color : danger_color;
}
sunnylinkStatus = ItemStatus{{tr("SUNNYLINK"), status}, color };
setProperty("sunnylinkStatus", QVariant::fromValue(sunnylinkStatus));
}

View File

@ -52,6 +52,7 @@ protected:
const QRect home_btn = QRect(60, 860, 180, 180);
const QRect settings_btn = QRect(50, 35, 200, 117);
const QColor good_color = QColor(255, 255, 255);
const QColor progress_color = QColor(3, 132, 252);
const QColor warning_color = QColor(218, 202, 37);
const QColor danger_color = QColor(201, 34, 49);
const QColor disabled_color = QColor(128, 128, 128);

View File

@ -18,7 +18,7 @@ def main():
def manage_athenad(dongle_id_param, pid_param, process_name, target):
params = Params()
dongle_id = params.get(dongle_id_param).decode('utf-8')
dongle_id = params.get(dongle_id_param, encoding='utf-8')
build_metadata = get_build_metadata()
cloudlog.bind_global(dongle_id=dongle_id,

View File

@ -18,6 +18,7 @@ from openpilot.common.api import SunnylinkApi
from openpilot.common.params import Params
from openpilot.common.realtime import set_core_affinity
from openpilot.common.swaglog import cloudlog
from openpilot.system.manager.sunnylink import sunnylink_need_register, sunnylink_ready
import cereal.messaging as messaging
SUNNYLINK_ATHENA_HOST = os.getenv('SUNNYLINK_ATHENA_HOST', 'wss://ws.stg.api.sunnypilot.ai')
@ -25,6 +26,7 @@ HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4"))
LOCAL_PORT_WHITELIST = {8022}
SUNNYLINK_LOG_ATTR_NAME = "user.sunny.upload"
SUNNYLINK_RECONNECT_TIMEOUT_S = 70 # FYI changing this will also would require a change on sidebar.cc
DISALLOW_LOG_UPLOAD = threading.Event()
params = Params()
sunnylink_api = SunnylinkApi(params.get("SunnylinkDongleId", encoding='utf-8'))
@ -52,6 +54,10 @@ def handle_long_poll(ws: WebSocket, exit_event: threading.Event | None) -> None:
thread.start()
try:
while not end_event.wait(0.1):
if not sunnylink_ready(params):
cloudlog.warning("Exiting sunnylinkd.handle_long_poll as SunnylinkEnabled is False")
break
sm.update(0)
if exit_event is not None and exit_event.is_set():
end_event.set()
@ -60,10 +66,13 @@ def handle_long_poll(ws: WebSocket, exit_event: threading.Event | None) -> None:
prime_type = params.get("PrimeType", encoding='utf-8') or 0
metered = sm['deviceState'].networkMetered
if metered and int(prime_type) > 2:
if DISALLOW_LOG_UPLOAD.is_set() and not comma_prime_cellular_end_event.is_set():
cloudlog.debug(f"sunnylinkd.handle_long_poll: DISALLOW_LOG_UPLOAD, setting comma_prime_cellular_end_event")
comma_prime_cellular_end_event.set()
elif metered and int(prime_type) > 2:
cloudlog.debug(f"sunnylinkd.handle_long_poll: PrimeType({prime_type}) > 2 and networkMetered({metered})")
comma_prime_cellular_end_event.set()
elif comma_prime_cellular_end_event.is_set():
elif comma_prime_cellular_end_event.is_set() and not DISALLOW_LOG_UPLOAD.is_set():
cloudlog.debug(f"sunnylinkd.handle_long_poll: comma_prime_cellular_end_event is set and not PrimeType({prime_type}) > 2 or not networkMetered({metered})")
comma_prime_cellular_end_event.clear()
finally:
@ -142,6 +151,10 @@ def sunny_log_handler(end_event: threading.Event, comma_prime_cellular_end_event
comma_prime_cellular_end_event.set()
@dispatcher.add_method
def toggleLogUpload(enabled: bool):
DISALLOW_LOG_UPLOAD.clear() if enabled and DISALLOW_LOG_UPLOAD.is_set() else DISALLOW_LOG_UPLOAD.set()
@dispatcher.add_method
def getParamsAllKeys() -> list[str]:
keys: list[str] = [k.decode('utf-8') for k in Params().all_keys()]
@ -185,12 +198,16 @@ def main(exit_event: threading.Event = None):
except Exception:
cloudlog.exception("failed to set core affinity")
while sunnylink_need_register(params):
cloudlog.info("Waiting for sunnylink registration to complete")
time.sleep(10)
UploadQueueCache.initialize(upload_queue)
ws_uri = SUNNYLINK_ATHENA_HOST
conn_start = None
conn_retries = 0
while exit_event is None or not exit_event.is_set():
while (exit_event is None or not exit_event.is_set()) and sunnylink_ready(params):
try:
if conn_start is None:
conn_start = time.monotonic()
@ -221,6 +238,10 @@ def main(exit_event: threading.Event = None):
time.sleep(backoff(conn_retries))
if not sunnylink_ready(params):
cloudlog.debug("Reached end of sunnylinkd.main while sunnylink is not ready. Waiting 60s before retrying")
time.sleep(60)
if __name__ == "__main__":
main()

View File

@ -8,6 +8,7 @@ import traceback
from cereal import custom
import cereal.messaging as messaging
import openpilot.system.sentry as sentry
from openpilot.common.api.sunnylink import UNREGISTERED_SUNNYLINK_DONGLE_ID
from openpilot.common.params import Params, ParamKeyType
from openpilot.common.text_window import TextWindow
from openpilot.system.hardware import HARDWARE, PC
@ -105,7 +106,8 @@ def manager_init() -> None:
("OsmDownloadedDate", "0"),
("OSMDownloadProgress", "{}"),
("SidebarTemperatureOptions", "0"),
("SunnylinkEnabled", "0"),
("SunnylinkEnabled", "0" if (build_metadata.release_channel or build_metadata.release_sp_channel) else "1"),
("SunnylinkDongleId", f"{UNREGISTERED_SUNNYLINK_DONGLE_ID}"),
("CustomDrivingModel", "0"),
("DrivingModelGeneration", "4"),
("LastSunnylinkPingTime", "0"),

View File

@ -6,6 +6,7 @@ from openpilot.system.hardware import PC, TICI
from openpilot.selfdrive.sunnypilot import get_model_generation
from openpilot.system.manager.process import PythonProcess, NativeProcess, DaemonProcess
from openpilot.system.mapd_manager import MAPD_PATH, COMMON_DIR
from openpilot.system.manager.sunnylink import sunnylink_need_register, sunnylink_ready
WEBCAM = os.getenv("USE_WEBCAM") is not None
@ -47,6 +48,14 @@ def model_use_nav(started, params, CP: car.CarParams) -> bool:
custom_model, model_gen = get_model_generation(params)
return started and custom_model and model_gen not in (0, 4)
def sunnylink_ready_shim(started, params, CP: car.CarParams) -> bool:
"""Shim for sunnylink_ready to match the process manager signature."""
return sunnylink_ready(params)
def sunnylink_need_register_shim(started, params, CP: car.CarParams) -> bool:
"""Shim for sunnylink_need_register to match the process manager signature."""
return sunnylink_need_register(params)
procs = [
DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"),
@ -102,16 +111,15 @@ procs = [
NativeProcess("bridge", "cereal/messaging", ["./bridge"], notcar),
PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar),
PythonProcess("webjoystick", "tools.bodyteleop.web", notcar),
# Sunnylink <3
DaemonProcess("manage_sunnylinkd", "system.athena.manage_sunnylinkd", "SunnylinkdPid"),
PythonProcess("sunnylink_registration", "system.manager.sunnylink", sunnylink_need_register_shim),
]
if Params().get_bool("SunnylinkEnabled"):
if os.path.exists("../athena/manage_sunnylinkd.py"):
if os.path.exists("../loggerd/sunnylink_uploader.py"):
procs += [
DaemonProcess("manage_sunnylinkd", "system.athena.manage_sunnylinkd", "SunnylinkdPid"),
]
if os.path.exists("../loggerd/sunnylink_uploader.py"):
procs += [
PythonProcess("sunnylink_uploader", "system.loggerd.sunnylink_uploader", always_run),
PythonProcess("sunnylink_uploader", "system.loggerd.sunnylink_uploader", sunnylink_ready_shim),
]
if os.path.exists("./gitlab_runner.sh") and not PC:

View File

@ -1,18 +1,44 @@
#!/usr/bin/env python3
from openpilot.common.api.sunnylink import SunnylinkApi
from cereal import log
from openpilot.common.api.sunnylink import SunnylinkApi, UNREGISTERED_SUNNYLINK_DONGLE_ID
from openpilot.common.params import Params
from openpilot.common.spinner import Spinner
from openpilot.system.hardware import HARDWARE
from openpilot.system.version import is_prebuilt
import time
NetworkType = log.DeviceState.NetworkType
if __name__ == "__main__":
spinner = Spinner()
def is_network_connected() -> bool:
"""Check if the device is connected to a network."""
return HARDWARE.get_network_type() != NetworkType.none
def get_sunnylink_status(params=Params()) -> tuple[bool, bool]:
"""Get the status of Sunnylink on the device. Returns a tuple of (is_sunnylink_enabled, is_registered)."""
is_sunnylink_enabled = params.get_bool("SunnylinkEnabled")
is_registered = params.get("SunnylinkDongleId", encoding='utf-8') not in (None, UNREGISTERED_SUNNYLINK_DONGLE_ID)
return is_sunnylink_enabled, is_registered
def sunnylink_ready(params=Params()) -> bool:
"""Check if the device is ready to communicate with Sunnylink. That means it is enabled and registered."""
is_sunnylink_enabled, is_registered = get_sunnylink_status(params)
return is_sunnylink_enabled and is_registered
def sunnylink_need_register(params=Params()) -> bool:
"""Check if the device needs to be registered with Sunnylink."""
is_sunnylink_enabled, is_registered = get_sunnylink_status(params)
return is_sunnylink_enabled and not is_registered and is_network_connected()
def register_sunnylink():
"""Register the device with Sunnylink if it is enabled."""
extra_args = {}
if not Params().get_bool("SunnylinkEnabled"):
print("Sunnylink is not enabled. Exiting.")
spinner.close()
exit(0)
if not is_prebuilt():
@ -21,6 +47,14 @@ if __name__ == "__main__":
"timeout": 60
}
sunnylink_id = SunnylinkApi(None).register_device(spinner, **extra_args)
sunnylink_id = SunnylinkApi(None).register_device(None, **extra_args)
print(f"SunnyLinkId: {sunnylink_id}")
spinner.close()
def main():
"""The main method is expected to be called by the manager when the device boots up."""
register_sunnylink()
if __name__ == "__main__":
main()