Release 260111
This commit is contained in:
456
selfdrive/test/test_onroad.py
Normal file
456
selfdrive/test/test_onroad.py
Normal file
@@ -0,0 +1,456 @@
|
||||
import math
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
import numpy as np
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
from tabulate import tabulate
|
||||
|
||||
from cereal import log
|
||||
import cereal.messaging as messaging
|
||||
from cereal.services import SERVICE_LIST
|
||||
from openpilot.common.basedir import BASEDIR
|
||||
from openpilot.common.timeout import Timeout
|
||||
from openpilot.common.params import Params
|
||||
from openpilot.selfdrive.selfdrived.events import EVENTS, ET
|
||||
from openpilot.selfdrive.test.helpers import set_params_enabled, release_only
|
||||
from openpilot.system.hardware import HARDWARE
|
||||
from openpilot.system.hardware.hw import Paths
|
||||
from openpilot.tools.lib.logreader import LogReader
|
||||
from openpilot.tools.lib.log_time_series import msgs_to_time_series
|
||||
|
||||
"""
|
||||
CPU usage budget
|
||||
* each process is entitled to at least 8%
|
||||
* total CPU usage of openpilot (sum(PROCS.values())
|
||||
should not exceed MAX_TOTAL_CPU
|
||||
"""
|
||||
|
||||
TEST_DURATION = 25
|
||||
LOG_OFFSET = 8
|
||||
|
||||
MAX_TOTAL_CPU = 280. # total for all 8 cores
|
||||
PROCS = {
|
||||
# Baseline CPU usage by process
|
||||
"selfdrive.controls.controlsd": 16.0,
|
||||
"selfdrive.selfdrived.selfdrived": 16.0,
|
||||
"selfdrive.car.card": 26.0,
|
||||
"./loggerd": 14.0,
|
||||
"./encoderd": 17.0,
|
||||
"./camerad": 10.0,
|
||||
"selfdrive.controls.plannerd": 9.0,
|
||||
"./ui": 18.0,
|
||||
"selfdrive.locationd.paramsd": 9.0,
|
||||
"./sensord": 7.0,
|
||||
"selfdrive.controls.radard": 2.0,
|
||||
"selfdrive.modeld.modeld": 22.0,
|
||||
"selfdrive.modeld.dmonitoringmodeld": 21.0,
|
||||
"system.hardware.hardwared": 4.0,
|
||||
"selfdrive.locationd.calibrationd": 2.0,
|
||||
"selfdrive.locationd.torqued": 5.0,
|
||||
"selfdrive.locationd.locationd": 25.0,
|
||||
"selfdrive.ui.soundd": 3.0,
|
||||
"selfdrive.monitoring.dmonitoringd": 4.0,
|
||||
"./proclogd": 2.0,
|
||||
"system.logmessaged": 1.0,
|
||||
"system.tombstoned": 0,
|
||||
"./logcatd": 1.0,
|
||||
"system.micd": 5.0,
|
||||
"system.timed": 0,
|
||||
"selfdrive.pandad.pandad": 0,
|
||||
"system.statsd": 1.0,
|
||||
"system.loggerd.uploader": 15.0,
|
||||
"system.loggerd.deleter": 1.0,
|
||||
}
|
||||
|
||||
PROCS.update({
|
||||
"tici": {
|
||||
"./pandad": 5.0,
|
||||
"./ubloxd": 1.0,
|
||||
"system.ubloxd.pigeond": 6.0,
|
||||
},
|
||||
"tizi": {
|
||||
"./pandad": 19.0,
|
||||
"system.qcomgpsd.qcomgpsd": 1.0,
|
||||
}
|
||||
}.get(HARDWARE.get_device_type(), {}))
|
||||
|
||||
TIMINGS = {
|
||||
# rtols: max/min, rsd
|
||||
"can": [2.5, 0.35],
|
||||
"pandaStates": [2.5, 0.35],
|
||||
"peripheralState": [2.5, 0.35],
|
||||
"sendcan": [2.5, 0.35],
|
||||
"carState": [2.5, 0.35],
|
||||
"carControl": [2.5, 0.35],
|
||||
"controlsState": [2.5, 0.35],
|
||||
"longitudinalPlan": [2.5, 0.5],
|
||||
"driverAssistance": [2.5, 0.5],
|
||||
"roadCameraState": [2.5, 0.35],
|
||||
"driverCameraState": [2.5, 0.35],
|
||||
"modelV2": [2.5, 0.35],
|
||||
"driverStateV2": [2.5, 0.40],
|
||||
"livePose": [2.5, 0.35],
|
||||
"wideRoadCameraState": [1.5, 0.35],
|
||||
}
|
||||
|
||||
LOGS_SIZE = { # MB per segment
|
||||
"qlog.zst": 0.5,
|
||||
"rlog.zst": 8.1,
|
||||
"qcamera.ts": 2.3,
|
||||
}
|
||||
LOGS_SIZE.update(dict.fromkeys(['ecamera.hevc', 'fcamera.hevc', 'dcamera.hevc'], 76.5))
|
||||
|
||||
|
||||
def cputime_total(ct):
|
||||
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
|
||||
|
||||
|
||||
@pytest.mark.tici
|
||||
@pytest.mark.skip_tici_setup
|
||||
class TestOnroad:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
if "DEBUG" in os.environ:
|
||||
segs = filter(lambda x: os.path.exists(os.path.join(x, "rlog.zst")), Path(Paths.log_root()).iterdir())
|
||||
segs = sorted(segs, key=lambda x: x.stat().st_mtime)
|
||||
cls.lr = list(LogReader(os.path.join(segs[-1], "rlog.zst")))
|
||||
cls.ts = msgs_to_time_series(cls.lr)
|
||||
return
|
||||
|
||||
# setup env
|
||||
params = Params()
|
||||
params.remove("CurrentRoute")
|
||||
params.put_bool("RecordFront", True)
|
||||
set_params_enabled()
|
||||
os.environ['REPLAY'] = '1'
|
||||
os.environ['TESTING_CLOSET'] = '1'
|
||||
if os.path.exists(Paths.log_root()):
|
||||
shutil.rmtree(Paths.log_root())
|
||||
|
||||
# start manager and run openpilot for TEST_DURATION
|
||||
proc = None
|
||||
try:
|
||||
manager_path = os.path.join(BASEDIR, "system/manager/manager.py")
|
||||
cls.manager_st = time.monotonic()
|
||||
proc = subprocess.Popen(["python", manager_path])
|
||||
|
||||
sm = messaging.SubMaster(['carState'])
|
||||
with Timeout(30, "controls didn't start"):
|
||||
while not sm.seen['carState']:
|
||||
sm.update(1000)
|
||||
|
||||
route = params.get("CurrentRoute", encoding="utf-8")
|
||||
assert route is not None
|
||||
|
||||
segs = list(Path(Paths.log_root()).glob(f"{route}--*"))
|
||||
assert len(segs) == 1
|
||||
|
||||
time.sleep(TEST_DURATION)
|
||||
finally:
|
||||
if proc is not None:
|
||||
proc.terminate()
|
||||
if proc.wait(60) is None:
|
||||
proc.kill()
|
||||
|
||||
cls.lr = list(LogReader(os.path.join(str(segs[0]), "rlog.zst")))
|
||||
st = time.monotonic()
|
||||
cls.ts = msgs_to_time_series(cls.lr)
|
||||
print("msgs to time series", time.monotonic() - st)
|
||||
log_path = segs[0]
|
||||
|
||||
cls.log_sizes = {}
|
||||
for f in log_path.iterdir():
|
||||
assert f.is_file()
|
||||
cls.log_sizes[f] = f.stat().st_size / 1e6
|
||||
|
||||
cls.msgs = defaultdict(list)
|
||||
for m in cls.lr:
|
||||
cls.msgs[m.which()].append(m)
|
||||
|
||||
def test_service_frequencies(self, subtests):
|
||||
for s, msgs in self.msgs.items():
|
||||
if s in ('initData', 'sentinel'):
|
||||
continue
|
||||
|
||||
# skip gps services for now
|
||||
if s in ('ubloxGnss', 'ubloxRaw', 'gnssMeasurements', 'gpsLocation', 'gpsLocationExternal', 'qcomGnss'):
|
||||
continue
|
||||
|
||||
with subtests.test(service=s):
|
||||
assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*int(TEST_DURATION*0.8))
|
||||
|
||||
def test_manager_starting_time(self):
|
||||
st = self.ts['managerState']['t'][0]
|
||||
assert (st - self.manager_st) < 10, f"manager.py took {st - self.manager_st}s to publish the first 'managerState' msg"
|
||||
|
||||
def test_cloudlog_size(self):
|
||||
msgs = self.msgs['logMessage']
|
||||
|
||||
total_size = sum(len(m.as_builder().to_bytes()) for m in msgs)
|
||||
assert total_size < 3.5e5
|
||||
|
||||
cnt = Counter(json.loads(m.logMessage)['filename'] for m in msgs)
|
||||
big_logs = [f for f, n in cnt.most_common(3) if n / sum(cnt.values()) > 30.]
|
||||
assert len(big_logs) == 0, f"Log spam: {big_logs}"
|
||||
|
||||
def test_log_sizes(self, subtests):
|
||||
# TODO: this isn't super stable between different devices
|
||||
for f, sz in self.log_sizes.items():
|
||||
rate = LOGS_SIZE[f.name]/60.
|
||||
minn = rate * TEST_DURATION * 0.5
|
||||
maxx = rate * TEST_DURATION * 1.5
|
||||
with subtests.test(file=f.name):
|
||||
assert minn < sz < maxx
|
||||
|
||||
def test_ui_timings(self):
|
||||
result = "\n"
|
||||
result += "------------------------------------------------\n"
|
||||
result += "-------------- UI Draw Timing ------------------\n"
|
||||
result += "------------------------------------------------\n"
|
||||
|
||||
ts = self.ts['uiDebug']['drawTimeMillis']
|
||||
result += f"min {min(ts):.2f}ms\n"
|
||||
result += f"max {max(ts):.2f}ms\n"
|
||||
result += f"std {np.std(ts):.2f}ms\n"
|
||||
result += f"mean {np.mean(ts):.2f}ms\n"
|
||||
result += "------------------------------------------------\n"
|
||||
print(result)
|
||||
|
||||
assert max(ts) < 250.
|
||||
assert np.mean(ts) < 10.
|
||||
#self.assertLess(np.std(ts), 5.)
|
||||
|
||||
# some slow frames are expected since camerad/modeld can preempt ui
|
||||
veryslow = [x for x in ts if x > 40.]
|
||||
assert len(veryslow) < 5, f"Too many slow frame draw times: {veryslow}"
|
||||
|
||||
def test_cpu_usage(self, subtests):
|
||||
print("\n------------------------------------------------")
|
||||
print("------------------ CPU Usage -------------------")
|
||||
print("------------------------------------------------")
|
||||
|
||||
plogs_by_proc = defaultdict(list)
|
||||
for pl in self.msgs['procLog']:
|
||||
for x in pl.procLog.procs:
|
||||
if len(x.cmdline) > 0:
|
||||
n = list(x.cmdline)[0]
|
||||
plogs_by_proc[n].append(x)
|
||||
|
||||
cpu_ok = True
|
||||
dt = (self.msgs['procLog'][-1].logMonoTime - self.msgs['procLog'][0].logMonoTime) / 1e9
|
||||
header = ['process', 'usage', 'expected', 'max allowed', 'test result']
|
||||
rows = []
|
||||
for proc_name, expected in PROCS.items():
|
||||
|
||||
error = ""
|
||||
usage = 0.
|
||||
x = plogs_by_proc[proc_name]
|
||||
if len(x) > 2:
|
||||
cpu_time = cputime_total(x[-1]) - cputime_total(x[0])
|
||||
usage = cpu_time / dt * 100.
|
||||
|
||||
max_allowed = max(expected * 1.8, expected + 5.0)
|
||||
if usage > max_allowed:
|
||||
error = "❌ USING MORE CPU THAN EXPECTED ❌"
|
||||
cpu_ok = False
|
||||
|
||||
else:
|
||||
error = "❌ NO METRICS FOUND ❌"
|
||||
cpu_ok = False
|
||||
|
||||
rows.append([proc_name, usage, expected, max_allowed, error or "✅"])
|
||||
print(tabulate(rows, header, tablefmt="simple_grid", stralign="center", numalign="center", floatfmt=".2f"))
|
||||
|
||||
# Ensure there's no missing procs
|
||||
all_procs = {p.name for p in self.msgs['managerState'][0].managerState.processes if p.shouldBeRunning}
|
||||
for p in all_procs:
|
||||
with subtests.test(proc=p):
|
||||
assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}"
|
||||
|
||||
# total CPU check
|
||||
procs_tot = sum([(max(x) if isinstance(x, tuple) else x) for x in PROCS.values()])
|
||||
with subtests.test(name="total CPU"):
|
||||
assert procs_tot < MAX_TOTAL_CPU, "Total CPU budget exceeded"
|
||||
print("------------------------------------------------")
|
||||
print(f"Total allocated CPU usage is {procs_tot}%, budget is {MAX_TOTAL_CPU}%, {MAX_TOTAL_CPU-procs_tot:.1f}% left")
|
||||
print("------------------------------------------------")
|
||||
|
||||
assert cpu_ok
|
||||
|
||||
def test_memory_usage(self):
|
||||
print("\n------------------------------------------------")
|
||||
print("--------------- Memory Usage -------------------")
|
||||
print("------------------------------------------------")
|
||||
offset = int(SERVICE_LIST['deviceState'].frequency * LOG_OFFSET)
|
||||
mems = [m.deviceState.memoryUsagePercent for m in self.msgs['deviceState'][offset:]]
|
||||
print("Memory usage: ", mems)
|
||||
|
||||
# check for big leaks. note that memory usage is
|
||||
# expected to go up while the MSGQ buffers fill up
|
||||
assert np.average(mems) <= 65, "Average memory usage above 65%"
|
||||
assert np.max(np.diff(mems)) <= 4, "Max memory increase too high"
|
||||
assert np.average(np.diff(mems)) <= 1, "Average memory increase too high"
|
||||
|
||||
def test_camera_frame_timings(self, subtests):
|
||||
# test timing within a single camera
|
||||
result = "\n"
|
||||
result += "------------------------------------------------\n"
|
||||
result += "----------------- SOF Timing ------------------\n"
|
||||
result += "------------------------------------------------\n"
|
||||
for name in ['roadCameraState', 'wideRoadCameraState', 'driverCameraState']:
|
||||
ts = self.ts[name]['timestampSof']
|
||||
d_ms = np.diff(ts) / 1e6
|
||||
d50 = np.abs(d_ms-50)
|
||||
result += f"{name} sof delta vs 50ms: min {min(d50):.2f}ms\n"
|
||||
result += f"{name} sof delta vs 50ms: max {max(d50):.2f}ms\n"
|
||||
result += f"{name} sof delta vs 50ms: mean {d50.mean():.2f}ms\n"
|
||||
with subtests.test(camera=name):
|
||||
assert max(d50) < 5.0, f"high SOF delta vs 50ms: {max(d50)}"
|
||||
result += "------------------------------------------------\n"
|
||||
print(result)
|
||||
|
||||
def test_camera_sync(self, subtests):
|
||||
cam_states = ['roadCameraState', 'wideRoadCameraState', 'driverCameraState']
|
||||
encode_cams = ['roadEncodeIdx', 'wideRoadEncodeIdx', 'driverEncodeIdx']
|
||||
for cams in (cam_states, encode_cams):
|
||||
with subtests.test(cams=cams):
|
||||
# sanity checks within a single cam
|
||||
for cam in cams:
|
||||
with subtests.test(test="frame_skips", camera=cam):
|
||||
assert set(np.diff(self.ts[cam]['frameId'])) == {1, }, "Frame ID skips"
|
||||
|
||||
# EOF > SOF
|
||||
eof_sof_diff = self.ts[cam]['timestampEof'] - self.ts[cam]['timestampSof']
|
||||
assert np.all(eof_sof_diff > 0)
|
||||
assert np.all(eof_sof_diff < 50*1e6)
|
||||
|
||||
first_fid = {c: min(self.ts[c]['frameId']) for c in cams}
|
||||
if cam.endswith('CameraState'):
|
||||
# camerad guarantees that all cams start on frame ID 0
|
||||
# (note loggerd also needs to start up fast enough to catch it)
|
||||
assert set(first_fid.values()) == {0, }, "Cameras don't start on frame ID 0"
|
||||
else:
|
||||
# encoder guarantees all cams start on the same frame ID
|
||||
assert len(set(first_fid.values())) == 1, "Cameras don't start on same frame ID"
|
||||
|
||||
# we don't do a full segment rotation, so these might not match exactly
|
||||
last_fid = {c: max(self.ts[c]['frameId']) for c in cams}
|
||||
assert max(last_fid.values()) - min(last_fid.values()) < 10
|
||||
|
||||
start, end = min(first_fid.values()), min(last_fid.values())
|
||||
for i in range(end-start):
|
||||
ts = {c: round(self.ts[c]['timestampSof'][i]/1e6, 1) for c in cams}
|
||||
diff = (max(ts.values()) - min(ts.values()))
|
||||
assert diff < 2, f"Cameras not synced properly: frame_id={start+i}, {diff=:.1f}ms, {ts=}"
|
||||
|
||||
def test_camera_encoder_matches(self, subtests):
|
||||
# sanity check that the frame metadata is consistent with the encoded frames
|
||||
pairs = [('roadCameraState', 'roadEncodeIdx'),
|
||||
('wideRoadCameraState', 'wideRoadEncodeIdx'),
|
||||
('driverCameraState', 'driverEncodeIdx')]
|
||||
for cam, enc in pairs:
|
||||
with subtests.test(camera=cam, encoder=enc):
|
||||
cam_frames = {fid: (sof, eof) for fid, sof, eof in zip(
|
||||
self.ts[cam]['frameId'],
|
||||
self.ts[cam]['timestampSof'],
|
||||
self.ts[cam]['timestampEof'],
|
||||
strict=True,
|
||||
)}
|
||||
for i, fid in enumerate(self.ts[enc]['frameId']):
|
||||
cam_sof, cam_eof = cam_frames[fid]
|
||||
enc_sof, enc_eof = self.ts[enc]['timestampSof'][i], self.ts[enc]['timestampEof'][i]
|
||||
assert enc_sof == cam_sof, f"SOF mismatch: frameId={fid}, enc_sof={enc_sof}, cam_sof={cam_sof}"
|
||||
assert enc_eof == cam_eof, f"EOF mismatch: frameId={fid}, enc_eof={enc_eof}, cam_eof={cam_eof}"
|
||||
|
||||
def test_mpc_execution_timings(self):
|
||||
result = "\n"
|
||||
result += "------------------------------------------------\n"
|
||||
result += "----------------- MPC Timing ------------------\n"
|
||||
result += "------------------------------------------------\n"
|
||||
|
||||
cfgs = [("longitudinalPlan", 0.05, 0.05),]
|
||||
for (s, instant_max, avg_max) in cfgs:
|
||||
ts = [getattr(m, s).solverExecutionTime for m in self.msgs[s]]
|
||||
assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
|
||||
assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
|
||||
result += f"'{s}' execution time: min {min(ts):.5f}s\n"
|
||||
result += f"'{s}' execution time: max {max(ts):.5f}s\n"
|
||||
result += f"'{s}' execution time: mean {np.mean(ts):.5f}s\n"
|
||||
result += "------------------------------------------------\n"
|
||||
print(result)
|
||||
|
||||
def test_model_execution_timings(self, subtests):
|
||||
result = "\n"
|
||||
result += "------------------------------------------------\n"
|
||||
result += "----------------- Model Timing -----------------\n"
|
||||
result += "------------------------------------------------\n"
|
||||
cfgs = [
|
||||
("modelV2", 0.045, 0.035),
|
||||
("driverStateV2", 0.045, 0.035),
|
||||
]
|
||||
for (s, instant_max, avg_max) in cfgs:
|
||||
ts = [getattr(m, s).modelExecutionTime for m in self.msgs[s]]
|
||||
# TODO some init can happen in first iteration
|
||||
ts = ts[1:]
|
||||
result += f"'{s}' execution time: min {min(ts):.5f}s\n"
|
||||
result += f"'{s}' execution time: max {max(ts):.5f}s\n"
|
||||
result += f"'{s}' execution time: mean {np.mean(ts):.5f}s\n"
|
||||
with subtests.test(s):
|
||||
assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
|
||||
assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
|
||||
result += "------------------------------------------------\n"
|
||||
print(result)
|
||||
|
||||
def test_timings(self):
|
||||
passed = True
|
||||
print("\n------------------------------------------------")
|
||||
print("----------------- Service Timings --------------")
|
||||
print("------------------------------------------------")
|
||||
|
||||
header = ['service', 'max', 'min', 'mean', 'expected mean', 'rsd', 'max allowed rsd', 'test result']
|
||||
rows = []
|
||||
for s, (maxmin, rsd) in TIMINGS.items():
|
||||
offset = int(SERVICE_LIST[s].frequency * LOG_OFFSET)
|
||||
msgs = [m.logMonoTime for m in self.msgs[s][offset:]]
|
||||
if not len(msgs):
|
||||
raise Exception(f"missing {s}")
|
||||
|
||||
ts = np.diff(msgs) / 1e9
|
||||
dt = 1 / SERVICE_LIST[s].frequency
|
||||
|
||||
errors = []
|
||||
if not np.allclose(np.mean(ts), dt, rtol=0.03, atol=0):
|
||||
errors.append("❌ FAILED MEAN TIMING CHECK ❌")
|
||||
if not np.allclose([np.max(ts), np.min(ts)], dt, rtol=maxmin, atol=0):
|
||||
errors.append("❌ FAILED MAX/MIN TIMING CHECK ❌")
|
||||
if (np.std(ts)/dt) > rsd:
|
||||
errors.append("❌ FAILED RSD TIMING CHECK ❌")
|
||||
passed = not errors and passed
|
||||
rows.append([s, *(np.array([np.max(ts), np.min(ts), np.mean(ts), dt])*1e3), np.std(ts)/dt, rsd, "\n".join(errors) or "✅"])
|
||||
|
||||
print(tabulate(rows, header, tablefmt="simple_grid", stralign="center", numalign="center", floatfmt=".2f"))
|
||||
assert passed
|
||||
|
||||
@release_only
|
||||
def test_startup(self):
|
||||
startup_alert = self.ts['selfdriveState']['alertText1'][0]
|
||||
expected = EVENTS[log.OnroadEvent.EventName.startup][ET.PERMANENT].alert_text_1
|
||||
assert startup_alert == expected, "wrong startup alert"
|
||||
|
||||
def test_engagable(self):
|
||||
no_entries = Counter()
|
||||
for m in self.msgs['onroadEvents']:
|
||||
for evt in m.onroadEvents:
|
||||
if evt.noEntry:
|
||||
no_entries[evt.name] += 1
|
||||
|
||||
offset = int(SERVICE_LIST['selfdriveState'].frequency * LOG_OFFSET)
|
||||
eng = [m.selfdriveState.engageable for m in self.msgs['selfdriveState'][offset:]]
|
||||
assert all(eng), \
|
||||
f"Not engageable for whole segment:\n- selfdriveState.engageable: {Counter(eng)}\n- No entry events: {no_entries}"
|
||||
Reference in New Issue
Block a user