New settings for governors, +30% memory bandwidth (#2590)

* new settings for governors, +30% memory bandwidth

* CPU waste is better

* warm up frame_drop

* optional CORE arg for rtshield

* back off priorities of processes not critical to driving

* if it doesn't drive, it's not realtime

* err, resource. this should have been included

* reverse order to not spike

Co-authored-by: Comma Device <device@comma.ai>
This commit is contained in:
George Hotz 2020-11-20 19:25:12 -08:00 committed by GitHub
parent 98f891a259
commit 917eff8c30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 43 additions and 8 deletions

View File

@ -19,6 +19,19 @@ function two_init {
# openpilot gets all the cores
echo 0-3 > /dev/cpuset/app/cpus
# set up governors
# +50mW offroad, +500mW onroad for 30% more RAM bandwidth
echo "performance" > /sys/class/devfreq/soc:qcom,cpubw/governor
echo 1056000 > /sys/class/devfreq/soc:qcom,m4m/max_freq
echo "performance" > /sys/class/devfreq/soc:qcom,m4m/governor
# unclear if these help, but they don't seem to hurt
echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu0/governor
echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu2/governor
# /sys/class/devfreq/soc:qcom,mincpubw is the only one left at "powersave"
# it seems to gain nothing but a wasted 500mW
# Collect RIL and other possibly long-running I/O interrupts onto CPU 1
echo 1 > /proc/irq/78/smp_affinity_list # qcom,smd-modem (LTE radio)
echo 1 > /proc/irq/33/smp_affinity_list # ufshcd (flash storage)

Binary file not shown.

View File

@ -36,8 +36,23 @@ void waste(int pid) {
double sec = seconds_since_boot();
while (1) {
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 0x1000000; j+=2) {
out = vmlaq_f32(out, tmp[j], tmp[j+1]);
for (int j = 0; j < 0x1000000; j+=0x20) {
out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
out = vmlaq_f32(out, tmp[j+6], tmp[j+7]);
out = vmlaq_f32(out, tmp[j+8], tmp[j+9]);
out = vmlaq_f32(out, tmp[j+10], tmp[j+11]);
out = vmlaq_f32(out, tmp[j+12], tmp[j+13]);
out = vmlaq_f32(out, tmp[j+14], tmp[j+15]);
out = vmlaq_f32(out, tmp[j+16], tmp[j+17]);
out = vmlaq_f32(out, tmp[j+18], tmp[j+19]);
out = vmlaq_f32(out, tmp[j+20], tmp[j+21]);
out = vmlaq_f32(out, tmp[j+22], tmp[j+23]);
out = vmlaq_f32(out, tmp[j+24], tmp[j+25]);
out = vmlaq_f32(out, tmp[j+26], tmp[j+27]);
out = vmlaq_f32(out, tmp[j+28], tmp[j+29]);
out = vmlaq_f32(out, tmp[j+30], tmp[j+31]);
}
}
double nsec = seconds_since_boot();

View File

@ -599,7 +599,7 @@ int main(int argc, char** argv) {
int err;
#ifdef QCOM
set_realtime_priority(50);
setpriority(PRIO_PROCESS, 0, -12);
#endif
if (argc > 1 && strcmp(argv[1], "--bootlog") == 0) {

View File

@ -3,6 +3,7 @@
#include <unistd.h>
#include <signal.h>
#include <cassert>
#include <sys/resource.h>
#include "common/visionbuf.h"
#include "common/visionipc.h"
@ -23,7 +24,7 @@ static void set_do_exit(int sig) {
int main(int argc, char **argv) {
int err;
set_realtime_priority(51);
setpriority(PRIO_PROCESS, 0, -15);
#ifdef QCOM2
set_core_affinity(5);

View File

@ -157,6 +157,7 @@ int main(int argc, char **argv) {
uint32_t frame_id = 0, last_vipc_frame_id = 0;
double last = 0;
int desire = -1;
uint32_t run_count = 0;
while (!do_exit) {
VIPCBuf *buf;
VIPCBufExtra extra;
@ -179,6 +180,8 @@ int main(int argc, char **argv) {
double mt1 = 0, mt2 = 0;
if (run_model_this_iter) {
run_count++;
float vec_desire[DESIRE_LEN] = {0};
if (desire >= 0 && desire < DESIRE_LEN) {
vec_desire[desire] = 1.0;
@ -198,6 +201,7 @@ int main(int argc, char **argv) {
// tracked dropped frames
uint32_t vipc_dropped_frames = extra.frame_id - last_vipc_frame_id - 1;
frames_dropped = (1. - frame_filter_k) * frames_dropped + frame_filter_k * (float)std::min(vipc_dropped_frames, 10U);
if (run_count < 10) frames_dropped = 0; // let frame drops warm up
float frame_drop_ratio = frames_dropped / (1 + frames_dropped);
model_publish(pm, extra.frame_id, frame_id, vipc_dropped_frames, frame_drop_ratio, model_buf, extra.timestamp_eof, model_execution_time);

8
selfdrive/rtshield.py Normal file → Executable file
View File

@ -1,16 +1,20 @@
#!/usr/bin/env python3
import os
import time
from common.realtime import set_core_affinity, set_realtime_priority
# RT shield - ensure CPU 3 always remains available for RT processes
# runs as SCHED_FIFO with minimum priority to ensure kthreads don't
# get scheduled onto CPU 3, but it's always preemptible by realtime
# openpilot processes
def main():
set_core_affinity(3)
set_core_affinity(int(os.getenv("CORE", "3")))
set_realtime_priority(1)
while True:
time.sleep(0.000001)
if __name__ == "__main__":
main()

View File

@ -377,7 +377,6 @@ def thermald_thread():
if started_ts is None:
started_ts = sec_since_boot()
started_seen = True
os.system('echo performance > /sys/class/devfreq/soc:qcom,cpubw/governor')
else:
if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev):
cloudlog.event("Startup blocked", startup_conditions=startup_conditions)
@ -387,7 +386,6 @@ def thermald_thread():
started_ts = None
if off_ts is None:
off_ts = sec_since_boot()
os.system('echo powersave > /sys/class/devfreq/soc:qcom,cpubw/governor')
# Offroad power monitoring
pm.calculate(health)