mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-02-18 17:43:54 +08:00
* sunnylink: add statsd process and related telemetry logging infrastructure - Introduced `statsd_sp` process for handling Sunnylink-specific stats. - Enhanced metrics logging with improved directory structure and data handling. * sunnylink: re-enable and refine stat_handler for telemetry processing - Reactivated `stat_handler` thread with improved path handling. - Made `stat_handler` more flexible by allowing directory injection. * statsd: fix formatting issue in telemetry string generation - Corrected missing comma between `sunnylink_dongle_id` and `comma_dongle_id`. * update statsd_sp process configuration for enhanced readiness logic - Modified `statsd_sp` initialization to include `always_run` alongside `sunnylink_ready_shim`. - Ensures robust process activation conditions. * refactor(statsd): enhance and unify StatLogSP implementation - Replaced custom `StatLogSP` in sunnylink with centralized implementation from `system.statsd`. - Ensures consistent logic for StatLogSP handling across modules. * fix * refactor(statsd): add intercept parameter to StatLogSP for configurable logging - Introduced optional `intercept` parameter to `StatLogSP` to manage `comma_statlog` initialization. - Updated usage in `sunnylink` to disable interception where unnecessary. * Dont complain * feat(statsd): add raw metric type and SunnyPilot-specific stats collection - Introduced `METRIC_TYPE.RAW` for base64-encoded raw data metrics. - Added `sp_stats` thread to export SunnyPilot params as raw metrics. - Enhanced telemetry handling with decoding and serialization updates. * refactor(statsd): improve `sp_stats` error handling and param processing - Enhanced exception handling for `params.get` to prevent crashes. - Added support for nested dict values to be included in stats. * refactor(statsd): adjust imports and minor code formatting updates - Updated `Ratekeeper` import path for consistency with the `openpilot` module structure. - Fixed minor formatting for improved readability. * refactor(statsd): update typings and remove unused NoReturn annotation - Removed unnecessary `NoReturn` typing for `stats_main` to simplify function definition. - Adjusted `get_influxdb_line_raw` to refine typing for `value` parameter. * cleanup * init * init * slightly more * staticmethod * handle them all * get them models * log with route * more * car * Revert "car" This reverts commitfe1c90cf4d. * handle capnp * Revert "handle capnp" This reverts commitc5aea68803. * 1 more time * Revert "1 more time" This reverts commita364474fa5. * Cleaning to expose wider * feat(interfaces, statsd): log car params to stats system - Added `STATSLOGSP` import and logging to capture `carFingerprint` in metrics. - Improved error handling in `get_influxdb_line_raw` for robust metric generation. * refactor(interfaces): streamline car params logging to stats - Simplified logging by directly converting `CP` to a dictionary. - Removed legacy stats aggregation for clarity. * feat(sunnylink): enable compression for stats in SunnyLink - Added optional compression for stats payload to support large data. - Updated `stat_handler` to handle compression and base64 encoding. * fix(statsd): filter complex types in `get_influxdb_line_raw` - Skips unsupported types (dict, list, bytes) to prevent formatting errors. - Simplifies type annotation for `value` parameter. * fix(statsd): use `json.dumps` for string conversion in `get_influxdb_line_raw` - Ensures proper handling of special characters in values. - Prevents potential formatting issues with raw `str()` conversion. * refactor(interfaces, statsd): update parameter keys for stats logging - Renamed logged keys for better clarity (`sunnypilot_params` → `sunnypilot.car_params`, `device_params`). - Ensures consistency across data logs. * bet --------- Co-authored-by: Jason Wen <haibin.wen3@gmail.com>
279 lines
8.0 KiB
Python
Executable File
279 lines
8.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import base64
|
|
import json
|
|
import os
|
|
import threading
|
|
import traceback
|
|
|
|
import zmq
|
|
import time
|
|
import uuid
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from datetime import datetime, UTC
|
|
|
|
from openpilot.common.params import Params
|
|
from cereal.messaging import SubMaster
|
|
from openpilot.system.hardware.hw import Paths
|
|
from openpilot.common.swaglog import cloudlog
|
|
from openpilot.system.hardware import HARDWARE
|
|
from openpilot.common.file_helpers import atomic_write_in_dir
|
|
from openpilot.system.version import get_build_metadata
|
|
from openpilot.system.loggerd.config import STATS_DIR_FILE_LIMIT, STATS_SOCKET, STATS_FLUSH_TIME_S
|
|
from openpilot.system.statsd import METRIC_TYPE, StatLogSP
|
|
from openpilot.common.realtime import Ratekeeper
|
|
|
|
STATSLOGSP = StatLogSP(intercept=False)
|
|
|
|
def sp_stats(end_event):
|
|
"""Collect sunnypilot-specific statistics and send as raw metrics."""
|
|
rk = Ratekeeper(.1, print_delay_threshold=None)
|
|
statlogsp = STATSLOGSP
|
|
params = Params()
|
|
|
|
def flatten_dict(d, parent_key='', sep='.'):
|
|
items = {}
|
|
if isinstance(d, dict):
|
|
for k, v in d.items():
|
|
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
|
items.update(flatten_dict(v, new_key, sep=sep))
|
|
elif isinstance(d, (list, tuple)):
|
|
for i, v in enumerate(d):
|
|
new_key = f"{parent_key}[{i}]"
|
|
items.update(flatten_dict(v, new_key, sep=sep))
|
|
else:
|
|
items[parent_key] = d
|
|
return items
|
|
|
|
# Collect sunnypilot parameters
|
|
stats_dict = {}
|
|
|
|
param_keys = [
|
|
'SunnylinkEnabled',
|
|
'AutoLaneChangeBsmDelay',
|
|
'AutoLaneChangeTimer',
|
|
'CarPlatformBundle',
|
|
'CurrentRoute',
|
|
'DevUIInfo',
|
|
'EnableCopyparty',
|
|
'IntelligentCruiseButtonManagement',
|
|
'QuietMode',
|
|
'RainbowMode',
|
|
'ShowAdvancedControls',
|
|
'Mads',
|
|
'MadsMainCruiseAllowed',
|
|
'MadsSteeringMode',
|
|
'MadsUnifiedEngagementMode',
|
|
'ModelManager_ActiveBundle',
|
|
'ModelManager_Favs',
|
|
'EnableSunnylinkUploader',
|
|
'SunnylinkEnabled',
|
|
'InstallDate',
|
|
'UptimeOffroad',
|
|
'UptimeOnroad',
|
|
]
|
|
|
|
while not end_event.is_set():
|
|
try:
|
|
for key in param_keys:
|
|
|
|
try:
|
|
value = params.get(key)
|
|
except Exception as e:
|
|
stats_dict[key] = e
|
|
continue
|
|
|
|
if value is None:
|
|
continue
|
|
|
|
if isinstance(value, (dict, list, tuple)):
|
|
stats_dict.update(flatten_dict(value, key))
|
|
else:
|
|
stats_dict[key] = value
|
|
|
|
if stats_dict:
|
|
statlogsp.raw('sunnypilot.device_params', stats_dict)
|
|
except Exception as e:
|
|
cloudlog.error(f"Exception {e}")
|
|
finally:
|
|
rk.keep_time()
|
|
|
|
|
|
def stats_main(end_event):
|
|
comma_dongle_id = Params().get("DongleId")
|
|
sunnylink_dongle_id = Params().get("SunnylinkDongleId")
|
|
|
|
def get_influxdb_line(measurement: str, value: float | dict[str, float], timestamp: datetime, tags: dict) -> str:
|
|
res = f"{measurement}"
|
|
for k, v in tags.items():
|
|
res += f",{k}={str(v)}"
|
|
res += " "
|
|
|
|
if isinstance(value, float):
|
|
value = {'value': value}
|
|
|
|
for k, v in value.items():
|
|
res += f"{k}={str(v)},"
|
|
|
|
res += f"sunnylink_dongle_id=\"{sunnylink_dongle_id}\",comma_dongle_id=\"{comma_dongle_id}\" {int(timestamp.timestamp() * 1e9)}\n"
|
|
return res
|
|
|
|
def get_influxdb_line_raw(measurement: str, value: dict, timestamp: datetime, tags: dict) -> str:
|
|
res = f"{measurement}"
|
|
try:
|
|
custom_tags = ""
|
|
for k, v in tags.items():
|
|
custom_tags += f",{k}={str(v)}"
|
|
res += custom_tags
|
|
|
|
fields = ""
|
|
for k, v in value.items():
|
|
# Skip complex types - only keep simple scalar values
|
|
if isinstance(v, (dict, list, bytes, bytearray)):
|
|
continue
|
|
|
|
fields += f"{k}={json.dumps(v)},"
|
|
|
|
res += f" {fields}"
|
|
except Exception as e:
|
|
cloudlog.error(f"Unable to get influxdb line for: {value}")
|
|
res += f",invalid=1 reason={e},"
|
|
|
|
res += f"sunnylink_dongle_id=\"{sunnylink_dongle_id}\",comma_dongle_id=\"{comma_dongle_id}\" {int(timestamp.timestamp() * 1e9)}\n"
|
|
return res
|
|
|
|
# open statistics socket
|
|
ctx = zmq.Context.instance()
|
|
sock = ctx.socket(zmq.PULL)
|
|
sock.bind(f"{STATS_SOCKET}_sp")
|
|
|
|
STATS_DIR = Paths.stats_sp_root()
|
|
|
|
# initialize stats directory
|
|
Path(STATS_DIR).mkdir(parents=True, exist_ok=True)
|
|
|
|
build_metadata = get_build_metadata()
|
|
|
|
# initialize tags
|
|
tags = {
|
|
'started': False,
|
|
'version': build_metadata.openpilot.version,
|
|
'branch': build_metadata.channel,
|
|
'dirty': build_metadata.openpilot.is_dirty,
|
|
'origin': build_metadata.openpilot.git_normalized_origin,
|
|
'deviceType': HARDWARE.get_device_type(),
|
|
}
|
|
|
|
# subscribe to deviceState for started state
|
|
sm = SubMaster(['deviceState'])
|
|
|
|
idx = 0
|
|
boot_uid = str(uuid.uuid4())[:8]
|
|
last_flush_time = time.monotonic()
|
|
gauges = {}
|
|
samples: dict[str, list[float]] = defaultdict(list)
|
|
raws: dict = defaultdict()
|
|
try:
|
|
while not end_event.is_set():
|
|
started_prev = sm['deviceState'].started
|
|
sm.update()
|
|
|
|
# Update metrics
|
|
while True:
|
|
try:
|
|
metric = sock.recv_string(zmq.NOBLOCK)
|
|
try:
|
|
metric_type = metric.split('|')[1]
|
|
metric_name = metric.split(':')[0]
|
|
metric_value_raw = metric.split('|')[0].split(':')[1]
|
|
|
|
if metric_type == METRIC_TYPE.GAUGE:
|
|
metric_value = float(metric_value_raw)
|
|
gauges[metric_name] = metric_value
|
|
elif metric_type == METRIC_TYPE.SAMPLE:
|
|
metric_value = float(metric_value_raw)
|
|
samples[metric_name].append(metric_value)
|
|
elif metric_type == METRIC_TYPE.RAW:
|
|
raws[metric_name] = metric_value_raw
|
|
else:
|
|
cloudlog.event("unknown metric type", metric_type=metric_type)
|
|
except Exception:
|
|
print(traceback.format_exc())
|
|
cloudlog.event("malformed metric", metric=metric)
|
|
except zmq.error.Again:
|
|
break
|
|
|
|
# flush when started state changes or after FLUSH_TIME_S
|
|
if (time.monotonic() > last_flush_time + STATS_FLUSH_TIME_S) or (sm['deviceState'].started != started_prev):
|
|
result = ""
|
|
current_time = datetime.now(UTC)
|
|
tags['started'] = sm['deviceState'].started
|
|
|
|
for key, value in raws.items():
|
|
decoded_value = json.loads(base64.b64decode(value).decode('utf-8'))
|
|
result += get_influxdb_line_raw(key, decoded_value, current_time, tags)
|
|
|
|
for key, value in gauges.items():
|
|
result += get_influxdb_line(f"gauge.{key}", value, current_time, tags)
|
|
|
|
for key, values in samples.items():
|
|
values.sort()
|
|
sample_count = len(values)
|
|
sample_sum = sum(values)
|
|
|
|
stats = {
|
|
'count': sample_count,
|
|
'min': values[0],
|
|
'max': values[-1],
|
|
'mean': sample_sum / sample_count,
|
|
}
|
|
for percentile in [0.05, 0.5, 0.95]:
|
|
value = values[int(round(percentile * (sample_count - 1)))]
|
|
stats[f"p{int(percentile * 100)}"] = value
|
|
|
|
result += get_influxdb_line(f"sample.{key}", stats, current_time, tags)
|
|
|
|
# clear intermediate data
|
|
gauges.clear()
|
|
samples.clear()
|
|
last_flush_time = time.monotonic()
|
|
|
|
# check that we aren't filling up the drive
|
|
if len(os.listdir(STATS_DIR)) < STATS_DIR_FILE_LIMIT:
|
|
if len(result) > 0:
|
|
stats_path = os.path.join(STATS_DIR, f"{boot_uid}_{idx}")
|
|
with atomic_write_in_dir(stats_path) as f:
|
|
f.write(result)
|
|
idx += 1
|
|
else:
|
|
cloudlog.error("stats dir full")
|
|
finally:
|
|
sock.close()
|
|
ctx.term()
|
|
|
|
|
|
def main():
|
|
rk = Ratekeeper(1, print_delay_threshold=None)
|
|
end_event = threading.Event()
|
|
|
|
threads = [
|
|
threading.Thread(target=stats_main, args=(end_event,)),
|
|
threading.Thread(target=sp_stats, args=(end_event,)),
|
|
]
|
|
|
|
for t in threads:
|
|
t.start()
|
|
|
|
try:
|
|
while all(t.is_alive() for t in threads):
|
|
rk.keep_time()
|
|
finally:
|
|
end_event.set()
|
|
|
|
for t in threads:
|
|
t.join()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|