Release 260111

This commit is contained in:
Comma Device
2026-01-11 18:23:29 +08:00
commit 3721ecbf8a
2601 changed files with 855070 additions and 0 deletions

View File

94
system/manager/build.py Executable file
View File

@@ -0,0 +1,94 @@
#!/usr/bin/env python3
import os
import subprocess
from pathlib import Path
# NOTE: Do NOT import anything here that needs be built (e.g. params)
from openpilot.common.basedir import BASEDIR
from openpilot.common.spinner import Spinner
from openpilot.common.text_window import TextWindow
from openpilot.system.hardware import HARDWARE, AGNOS
from openpilot.common.swaglog import cloudlog, add_file_handler
from openpilot.system.version import get_build_metadata
MAX_CACHE_SIZE = 4e9 if "CI" in os.environ else 2e9
CACHE_DIR = Path("/data/scons_cache" if AGNOS else "/tmp/scons_cache")
TOTAL_SCONS_NODES = 3130
MAX_BUILD_PROGRESS = 100
def build(spinner: Spinner, dirty: bool = False, minimal: bool = False) -> None:
env = os.environ.copy()
env['SCONS_PROGRESS'] = "1"
nproc = os.cpu_count()
if nproc is None:
nproc = 2
extra_args = ["--minimal"] if minimal else []
if AGNOS:
HARDWARE.set_power_save(False)
os.sched_setaffinity(0, range(8)) # ensure we can use the isolcpus cores
# building with all cores can result in using too
# much memory, so retry with less parallelism
compile_output: list[bytes] = []
for n in (nproc, nproc/2, 1):
compile_output.clear()
scons: subprocess.Popen = subprocess.Popen(["scons", f"-j{int(n)}", "--cache-populate", *extra_args], cwd=BASEDIR, env=env, stderr=subprocess.PIPE)
assert scons.stderr is not None
# Read progress from stderr and update spinner
while scons.poll() is None:
try:
line = scons.stderr.readline()
if line is None:
continue
line = line.rstrip()
prefix = b'progress: '
if line.startswith(prefix):
i = int(line[len(prefix):])
spinner.update_progress(MAX_BUILD_PROGRESS * min(1., i / TOTAL_SCONS_NODES), 100.)
elif len(line):
compile_output.append(line)
print(line.decode('utf8', 'replace'))
except Exception:
pass
if scons.returncode == 0:
break
if scons.returncode != 0:
# Read remaining output
if scons.stderr is not None:
compile_output += scons.stderr.read().split(b'\n')
# Build failed log errors
error_s = b"\n".join(compile_output).decode('utf8', 'replace')
add_file_handler(cloudlog)
cloudlog.error("scons build failed\n" + error_s)
# Show TextWindow
spinner.close()
if not os.getenv("CI"):
with TextWindow("openpilot failed to build\n \n" + error_s) as t:
t.wait_for_exit()
exit(1)
# enforce max cache size
cache_files = [f for f in CACHE_DIR.rglob('*') if f.is_file()]
cache_files.sort(key=lambda f: f.stat().st_mtime)
cache_size = sum(f.stat().st_size for f in cache_files)
for f in cache_files:
if cache_size < MAX_CACHE_SIZE:
break
cache_size -= f.stat().st_size
f.unlink()
if __name__ == "__main__":
spinner = Spinner()
spinner.update_progress(0, 100)
build_metadata = get_build_metadata()
build(spinner, build_metadata.openpilot.is_dirty, minimal = AGNOS)

67
system/manager/helpers.py Normal file
View File

@@ -0,0 +1,67 @@
import errno
import fcntl
import os
import sys
import pathlib
import shutil
import signal
import subprocess
import tempfile
import threading
from openpilot.common.basedir import BASEDIR
from openpilot.common.params import Params
def unblock_stdout() -> None:
# get a non-blocking stdout
child_pid, child_pty = os.forkpty()
if child_pid != 0: # parent
# child is in its own process group, manually pass kill signals
signal.signal(signal.SIGINT, lambda signum, frame: os.kill(child_pid, signal.SIGINT))
signal.signal(signal.SIGTERM, lambda signum, frame: os.kill(child_pid, signal.SIGTERM))
fcntl.fcntl(sys.stdout, fcntl.F_SETFL, fcntl.fcntl(sys.stdout, fcntl.F_GETFL) | os.O_NONBLOCK)
while True:
try:
dat = os.read(child_pty, 4096)
except OSError as e:
if e.errno == errno.EIO:
break
continue
if not dat:
break
try:
sys.stdout.write(dat.decode('utf8'))
except (OSError, UnicodeDecodeError):
pass
# os.wait() returns a tuple with the pid and a 16 bit value
# whose low byte is the signal number and whose high byte is the exit status
exit_status = os.wait()[1] >> 8
os._exit(exit_status)
def write_onroad_params(started, params):
params.put_bool("IsOnroad", started)
params.put_bool("IsOffroad", not started)
def save_bootlog():
# copy current params
tmp = tempfile.mkdtemp()
params_dirname = pathlib.Path(Params().get_param_path()).name
params_dir = os.path.join(tmp, params_dirname)
shutil.copytree(Params().get_param_path(), params_dir, dirs_exist_ok=True)
def fn(tmpdir):
env = os.environ.copy()
env['PARAMS_COPY_PATH'] = tmpdir
subprocess.call("./bootlog", cwd=os.path.join(BASEDIR, "system/loggerd"), env=env)
shutil.rmtree(tmpdir)
t = threading.Thread(target=fn, args=(tmp, ))
t.daemon = True
t.start()

450
system/manager/manager.py Executable file
View File

@@ -0,0 +1,450 @@
#!/usr/bin/env python3
import datetime
import os
import signal
import sys
import traceback
from cereal import log
import cereal.messaging as messaging
import openpilot.system.sentry as sentry
from openpilot.common.params import Params, ParamKeyType
from openpilot.common.text_window import TextWindow
from openpilot.system.hardware import HARDWARE
from openpilot.system.manager.helpers import unblock_stdout, write_onroad_params, save_bootlog
from openpilot.system.manager.process import ensure_running
from openpilot.system.manager.process_config import managed_processes
from openpilot.system.athena.registration import register, UNREGISTERED_DONGLE_ID
from openpilot.common.swaglog import cloudlog, add_file_handler
from openpilot.system.version import get_build_metadata, terms_version, training_version
from openpilot.system.hardware.hw import Paths
def get_default_params():
default_params : list[tuple[str, str | bytes]] = [
# kans
("LongPitch", "1"),
("EVTable", "1"),
("CompletedTrainingVersion", "0"),
("DisengageOnAccelerator", "0"),
("GsmMetered", "1"),
("HasAcceptedTerms", "0"),
("LanguageSetting", "main_en"),
("OpenpilotEnabledToggle", "1"),
("LongitudinalPersonality", str(log.LongitudinalPersonality.standard)),
("IsMetric", "1"),
("RecordAudio", "1"),
("SearchInput", "0"),
("GMapKey", "0"),
("MapboxStyle", "0"),
("LongitudinalPersonalityMax", "3"),
("ShowDebugUI", "0"),
("ShowTpms", "1"),
("ShowDateTime", "1"),
("ShowPathEnd", "1"),
("ShowCustomBrightness", "100"),
("ShowLaneInfo", "1"),
("ShowRadarInfo", "1"),
("ShowDeviceState", "1"),
("ShowRouteInfo", "1"),
("ShowPathMode", "9"),
("ShowPathColor", "13"),
("ShowPathColorCruiseOff", "19"),
("ShowPathModeLane", "14"),
("ShowPathColorLane", "13"),
("ShowPlotMode", "0"),
("AutoCruiseControl", "0"),
("CruiseEcoControl", "2"),
("CarrotCruiseDecel", "-1"),
("CarrotCruiseAtcDecel", "-1"),
("CommaLongAcc", "0"),
("AutoGasTokSpeed", "0"),
("AutoGasSyncSpeed", "1"),
("AutoEngage", "0"),
("DisableMinSteerSpeed", "0"),
("SoftHoldMode", "0"),
("AutoSpeedUptoRoadSpeedLimit", "0"),
("AutoRoadSpeedAdjust", "50"),
("AutoCurveSpeedLowerLimit", "30"),
("AutoCurveSpeedFactor", "120"),
("AutoCurveSpeedAggressiveness", "100"),
("AutoTurnControl", "0"),
("AutoTurnControlSpeedTurn", "20"),
("AutoTurnControlTurnEnd", "6"),
("AutoTurnMapChange", "0"),
("AutoNaviSpeedCtrlEnd", "7"),
("AutoNaviSpeedCtrlMode", "2"),
("AutoNaviSpeedBumpTime", "1"),
("AutoNaviSpeedBumpSpeed", "35"),
("AutoNaviSpeedSafetyFactor", "105"),
("AutoNaviSpeedDecelRate", "120"),
("AutoRoadSpeedLimitOffset", "-1"),
("AutoNaviCountDownMode", "2"),
("TurnSpeedControlMode", "1"),
("CarrotSmartSpeedControl", "0"),
("MapTurnSpeedFactor", "90"),
("ModelTurnSpeedFactor", "0"),
("StoppingAccel", "0"),
("StopDistanceCarrot", "550"),
("JLeadFactor3", "0"),
("CruiseButtonMode", "0"),
("CancelButtonMode", "0"),
("LfaButtonMode", "0"),
("CruiseButtonTest1", "8"),
("CruiseButtonTest2", "30"),
("CruiseButtonTest3", "1"),
("CruiseSpeedUnit", "10"),
("CruiseSpeedUnitBasic", "1"),
("CruiseSpeed1", "30"),
("CruiseSpeed2", "50"),
("CruiseSpeed3", "80"),
("CruiseSpeed4", "110"),
("CruiseSpeed5", "130"),
("PaddleMode", "0"),
("MyDrivingMode", "3"),
("MyDrivingModeAuto", "0"),
("TrafficLightDetectMode", "2"),
("CruiseMaxVals0", "160"),
("CruiseMaxVals1", "200"),
("CruiseMaxVals2", "160"),
("CruiseMaxVals3", "130"),
("CruiseMaxVals4", "110"),
("CruiseMaxVals5", "95"),
("CruiseMaxVals6", "80"),
("LongTuningKpV", "100"),
("LongTuningKiV", "0"),
("LongTuningKf", "100"),
("LongActuatorDelay", "20"),
("VEgoStopping", "50"),
("RadarReactionFactor", "100"),
("EnableRadarTracks", "0"),
("RadarLatFactor", "0"),
("EnableCornerRadar", "0"),
("HyundaiCameraSCC", "0"),
("IsLdwsCar", "0"),
("CanfdHDA2", "0"),
("CanfdDebug", "0"),
("SoundVolumeAdjust", "100"),
("SoundVolumeAdjustEngage", "10"),
("TFollowGap1", "110"),
("TFollowGap2", "120"),
("TFollowGap3", "140"),
("TFollowGap4", "160"),
("DynamicTFollow", "0"),
("AChangeCostStarting", "10"),
("TrafficStopDistanceAdjust", "400"),
("DynamicTFollowLC", "100"),
("HapticFeedbackWhenSpeedCamera", "0"),
("UseLaneLineSpeed", "0"),
("PathOffset", "0"),
("UseLaneLineCurveSpeed", "0"),
("AdjustLaneOffset", "0"),
("LaneChangeNeedTorque", "0"),
("LaneChangeDelay", "0"),
("LaneChangeBsd", "0"),
("MaxAngleFrames", "89"),
("LateralTorqueCustom", "0"),
("LateralTorqueAccelFactor", "2500"),
("LateralTorqueFriction", "100"),
("LateralTorqueKpV", "100"),
("LateralTorqueKiV", "10"),
("LateralTorqueKf", "100"),
("LateralTorqueKd", "0"),
("LatMpcPathCost", "200"),
("LatMpcMotionCost", "7"),
("LatMpcAccelCost", "120"),
("LatMpcJerkCost", "4"),
("LatMpcSteeringRateCost", "7"),
("LatMpcInputOffset", "4"),
("CustomSteerMax", "0"),
("CustomSteerDeltaUp", "0"),
("CustomSteerDeltaDown", "0"),
("CustomSteerDeltaUpLC", "0"),
("CustomSteerDeltaDownLC", "0"),
("SpeedFromPCM", "2"),
("SteerActuatorDelay", "0"),
("LatSmoothSec", "13"),
("MaxTimeOffroadMin", "60"),
("DisableDM", "1"),
("EnableConnect", "0"),
("MuteDoor", "0"),
("MuteSeatbelt", "0"),
("RecordRoadCam", "0"),
("HDPuse", "0"),
("CruiseOnDist", "400"),
("HotspotOnBoot", "0"),
("SoftwareMenu", "1"),
("CustomSR", "0"),
("SteerRatioRate", "100"),
("NNFF", "0"),
("NNFFLite", "0"),
("ForceOffroad", "0"),
("BydModifiedStockLong", "1"),
("AlwaysOnLKAS", "0"),
("BydAutoTuning", "0"),
("BydLatUseSiglin", "1"),
("CameraOffset", "8"),
("BydBsdType2", "0"),
("UseRedPanda", "1"),
("KeepLkasPassive", "0"),
("UseSteerRateLimiter", "1"),
("SteerRateLimLoSpd", "132"),
("SteerRateLimHiSpd", "64"),
("BydMpcTsr", "0"),
("BydLowSpdLong", "1"),
("SpeedCorrect30", "0"),
("SpeedCorrect60", "0"),
("SpeedCorrect90", "0"),
("SpeedCorrect120", "0"),
("LateralAngleSpdUp0", "500"),
("LateralAngleSpdDn0", "500"),
("LateralAngleSpdBp1", "30"),
("LateralAngleSpdUp1", "80"),
("LateralAngleSpdDn1", "350"),
("LateralAngleSpdBp2", "70"),
("LateralAngleSpdUp2", "15"),
("LateralAngleSpdDn2", "40"),
("LateralAngleTorqMax", "30"),
("LateralAngleTorqCut", "10"),
]
return default_params
def set_default_params():
params = Params()
default_params = get_default_params()
try:
default_params.remove(("GMapKey", "0"))
default_params.remove(("CompletedTrainingVersion", "0"))
default_params.remove(("LanguageSetting", "main_en"))
default_params.remove(("GsmMetered", "1"))
except ValueError:
pass
for k, v in default_params:
params.put(k, v)
print(f"SetToDefault[{k}]={v}")
def get_default_params_key():
default_params = get_default_params()
all_keys = [key for key, _ in default_params]
return all_keys
def manager_init() -> None:
save_bootlog()
build_metadata = get_build_metadata()
params = Params()
params.clear_all(ParamKeyType.CLEAR_ON_MANAGER_START)
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
if build_metadata.release_channel:
params.clear_all(ParamKeyType.DEVELOPMENT_ONLY)
default_params = get_default_params()
if params.get_bool("RecordFrontLock"):
params.put_bool("RecordFront", True)
# set unset params
for k, v in default_params:
if params.get(k) is None:
params.put(k, v)
# Create folders needed for msgq
try:
os.mkdir(Paths.shm_path())
except FileExistsError:
pass
except PermissionError:
print(f"WARNING: failed to make {Paths.shm_path()}")
# set params
serial = HARDWARE.get_serial()
params.put("Version", build_metadata.openpilot.version)
params.put("TermsVersion", terms_version)
params.put("TrainingVersion", training_version)
params.put("GitCommit", build_metadata.openpilot.git_commit)
params.put("GitCommitDate", build_metadata.openpilot.git_commit_date)
params.put("GitBranch", build_metadata.channel)
params.put("GitRemote", build_metadata.openpilot.git_origin)
params.put_bool("IsTestedBranch", build_metadata.tested_channel)
params.put_bool("IsReleaseBranch", build_metadata.release_channel)
params.put("HardwareSerial", serial)
# set dongle id
reg_res = register(show_spinner=True)
if reg_res:
dongle_id = reg_res
else:
raise Exception(f"Registration failed for device {serial}")
os.environ['DONGLE_ID'] = dongle_id # Needed for swaglog
os.environ['GIT_ORIGIN'] = build_metadata.openpilot.git_normalized_origin # Needed for swaglog
os.environ['GIT_BRANCH'] = build_metadata.channel # Needed for swaglog
os.environ['GIT_COMMIT'] = build_metadata.openpilot.git_commit # Needed for swaglog
if not build_metadata.openpilot.is_dirty:
os.environ['CLEAN'] = '1'
# init logging
sentry.init(sentry.SentryProject.SELFDRIVE)
cloudlog.bind_global(dongle_id=dongle_id,
version=build_metadata.openpilot.version,
origin=build_metadata.openpilot.git_normalized_origin,
branch=build_metadata.channel,
commit=build_metadata.openpilot.git_commit,
dirty=build_metadata.openpilot.is_dirty,
device=HARDWARE.get_device_type())
# preimport all processes
for p in managed_processes.values():
p.prepare()
def manager_cleanup() -> None:
# send signals to kill all procs
for p in managed_processes.values():
p.stop(block=False)
# ensure all are killed
for p in managed_processes.values():
p.stop(block=True)
cloudlog.info("everything is dead")
def manager_thread() -> None:
cloudlog.bind(daemon="manager")
cloudlog.info("manager start")
cloudlog.info({"environ": os.environ})
params = Params()
ignore: list[str] = []
if params.get("DongleId", encoding='utf8') in (None, UNREGISTERED_DONGLE_ID):
ignore += ["manage_athenad", "uploader"]
if os.getenv("NOBOARD") is not None:
ignore.append("pandad")
ignore += [x for x in os.getenv("BLOCK", "").split(",") if len(x) > 0]
if params.get("HardwareC3xLite"):
ignore += ["micd", "soundd", "loggerd"]
params.put("RecordAudio", "0")
sm = messaging.SubMaster(['deviceState', 'carParams'], poll='deviceState')
pm = messaging.PubMaster(['managerState'])
write_onroad_params(False, params)
ensure_running(managed_processes.values(), False, params=params, CP=sm['carParams'], not_run=ignore)
print_timer = 0
started_prev = False
while True:
sm.update(1000)
started = sm['deviceState'].started
if started and not started_prev:
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
elif not started and started_prev:
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
# update onroad params, which drives pandad's safety setter thread
if started != started_prev:
write_onroad_params(started, params)
started_prev = started
ensure_running(managed_processes.values(), started, params=params, CP=sm['carParams'], not_run=ignore)
running = ' '.join("{}{}\u001b[0m".format("\u001b[32m" if p.proc.is_alive() else "\u001b[31m", p.name)
for p in managed_processes.values() if p.proc)
print_timer = (print_timer + 1)%10
if print_timer == 0:
print(running)
cloudlog.debug(running)
# send managerState
msg = messaging.new_message('managerState', valid=True)
msg.managerState.processes = [p.get_process_state_msg() for p in managed_processes.values()]
pm.send('managerState', msg)
# Exit main loop when uninstall/shutdown/reboot is needed
shutdown = False
for param in ("DoUninstall", "DoShutdown", "DoReboot"):
if params.get_bool(param):
shutdown = True
params.put("LastManagerExitReason", f"{param} {datetime.datetime.now()}")
cloudlog.warning(f"Shutting down manager - {param} set")
if shutdown:
break
def main() -> None:
manager_init()
print(f"python ../../opendbc/car/hyundai/values.py > {Params().get_param_path()}/SupportedCars")
os.system(f"python ../../opendbc/car/hyundai/values.py > {Params().get_param_path()}/SupportedCars")
os.system(f"python ../../opendbc/car/gm/values.py > {Params().get_param_path()}/SupportedCars_gm")
os.system(f"python ../../opendbc/car/toyota/values.py > {Params().get_param_path()}/SupportedCars_toyota")
os.system(f"python ../../opendbc/car/mazda/values.py > {Params().get_param_path()}/SupportedCars_mazda")
os.system(f"python ../../opendbc/car/byd/values.py > {Params().get_param_path()}/SupportedCars_byd")
if os.getenv("PREPAREONLY") is not None:
return
# SystemExit on sigterm
signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(1))
try:
manager_thread()
except Exception:
traceback.print_exc()
sentry.capture_exception()
finally:
manager_cleanup()
params = Params()
if params.get_bool("DoUninstall"):
cloudlog.warning("uninstalling")
HARDWARE.uninstall()
elif params.get_bool("DoReboot"):
cloudlog.warning("reboot")
HARDWARE.reboot()
elif params.get_bool("DoShutdown"):
cloudlog.warning("shutdown")
HARDWARE.shutdown()
if __name__ == "__main__":
unblock_stdout()
try:
main()
except KeyboardInterrupt:
print("got CTRL-C, exiting")
except Exception:
add_file_handler(cloudlog)
cloudlog.exception("Manager failed to start")
try:
managed_processes['ui'].stop()
except Exception:
pass
# Show last 3 lines of traceback
error = traceback.format_exc(-3)
error = "Manager failed to start\n\n" + error
with TextWindow(error) as t:
t.wait_for_exit()
raise
# manual exit because we are forked
sys.exit(0)

301
system/manager/process.py Normal file
View File

@@ -0,0 +1,301 @@
import importlib
import os
import signal
import struct
import time
import subprocess
from collections.abc import Callable, ValuesView
from abc import ABC, abstractmethod
from multiprocessing import Process
from setproctitle import setproctitle
from cereal import car, log
import cereal.messaging as messaging
import openpilot.system.sentry as sentry
from openpilot.common.basedir import BASEDIR
from openpilot.common.params import Params
from openpilot.common.swaglog import cloudlog
from openpilot.system.hardware.hw import Paths
WATCHDOG_FN = f"{Paths.shm_path()}/wd_"
ENABLE_WATCHDOG = os.getenv("NO_WATCHDOG") is None
def launcher(proc: str, name: str) -> None:
try:
# import the process
mod = importlib.import_module(proc)
# rename the process
setproctitle(proc)
# create new context since we forked
messaging.reset_context()
# add daemon name tag to logs
cloudlog.bind(daemon=name)
sentry.set_tag("daemon", name)
# exec the process
mod.main()
except KeyboardInterrupt:
cloudlog.warning(f"child {proc} got SIGINT")
except Exception:
# can't install the crash handler because sys.excepthook doesn't play nice
# with threads, so catch it here.
sentry.capture_exception()
raise
def nativelauncher(pargs: list[str], cwd: str, name: str) -> None:
os.environ['MANAGER_DAEMON'] = name
# exec the process
os.chdir(cwd)
os.execvp(pargs[0], pargs)
def join_process(process: Process, timeout: float) -> None:
# Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382
# We have to poll the exitcode instead
t = time.monotonic()
while time.monotonic() - t < timeout and process.exitcode is None:
time.sleep(0.001)
class ManagerProcess(ABC):
daemon = False
sigkill = False
should_run: Callable[[bool, Params, car.CarParams], bool]
proc: Process | None = None
enabled = True
name = ""
last_watchdog_time = 0
watchdog_max_dt: int | None = None
watchdog_seen = False
shutting_down = False
@abstractmethod
def prepare(self) -> None:
pass
@abstractmethod
def start(self) -> None:
pass
def restart(self) -> None:
self.stop(sig=signal.SIGKILL)
self.start()
def check_watchdog(self, started: bool) -> None:
if self.watchdog_max_dt is None or self.proc is None:
return
try:
fn = WATCHDOG_FN + str(self.proc.pid)
with open(fn, "rb") as f:
# TODO: why can't pylint find struct.unpack?
self.last_watchdog_time = struct.unpack('Q', f.read())[0]
except Exception:
pass
dt = time.monotonic() - self.last_watchdog_time / 1e9
if dt > self.watchdog_max_dt:
if self.watchdog_seen and ENABLE_WATCHDOG:
cloudlog.error(f"Watchdog timeout for {self.name} (exitcode {self.proc.exitcode}) restarting ({started=})")
self.restart()
else:
self.watchdog_seen = True
def stop(self, retry: bool = True, block: bool = True, sig: signal.Signals = None) -> int | None:
if self.proc is None:
return None
if self.proc.exitcode is None:
if not self.shutting_down:
cloudlog.info(f"killing {self.name}")
if sig is None:
sig = signal.SIGKILL if self.sigkill else signal.SIGINT
self.signal(sig)
self.shutting_down = True
if not block:
return None
join_process(self.proc, 5)
# If process failed to die send SIGKILL
if self.proc.exitcode is None and retry:
cloudlog.info(f"killing {self.name} with SIGKILL")
self.signal(signal.SIGKILL)
self.proc.join()
ret = self.proc.exitcode
cloudlog.info(f"{self.name} is dead with {ret}")
if self.proc.exitcode is not None:
self.shutting_down = False
self.proc = None
return ret
def signal(self, sig: int) -> None:
if self.proc is None:
return
# Don't signal if already exited
if self.proc.exitcode is not None and self.proc.pid is not None:
return
# Can't signal if we don't have a pid
if self.proc.pid is None:
return
cloudlog.info(f"sending signal {sig} to {self.name}")
os.kill(self.proc.pid, sig)
def get_process_state_msg(self):
state = log.ManagerState.ProcessState.new_message()
state.name = self.name
if self.proc:
state.running = self.proc.is_alive()
state.shouldBeRunning = self.proc is not None and not self.shutting_down
state.pid = self.proc.pid or 0
state.exitCode = self.proc.exitcode or 0
return state
class NativeProcess(ManagerProcess):
def __init__(self, name, cwd, cmdline, should_run, enabled=True, sigkill=False, watchdog_max_dt=None):
self.name = name
self.cwd = cwd
self.cmdline = cmdline
self.should_run = should_run
self.enabled = enabled
self.sigkill = sigkill
self.watchdog_max_dt = watchdog_max_dt
self.launcher = nativelauncher
def prepare(self) -> None:
pass
def start(self) -> None:
# In case we only tried a non blocking stop we need to stop it before restarting
if self.shutting_down:
self.stop()
if self.proc is not None:
return
cwd = os.path.join(BASEDIR, self.cwd)
cloudlog.info(f"starting process {self.name}")
self.proc = Process(name=self.name, target=self.launcher, args=(self.cmdline, cwd, self.name))
self.proc.start()
self.watchdog_seen = False
self.shutting_down = False
class PythonProcess(ManagerProcess):
def __init__(self, name, module, should_run, enabled=True, sigkill=False, watchdog_max_dt=None):
self.name = name
self.module = module
self.should_run = should_run
self.enabled = enabled
self.sigkill = sigkill
self.watchdog_max_dt = watchdog_max_dt
self.launcher = launcher
def prepare(self) -> None:
if self.enabled:
cloudlog.info(f"preimporting {self.module}")
try:
importlib.import_module(self.module)
except Exception as e:
print(f"failed to import {self.module}: {e}")
def start(self) -> None:
# In case we only tried a non blocking stop we need to stop it before restarting
if self.shutting_down:
self.stop()
if self.proc is not None:
return
# TODO: this is just a workaround for this tinygrad check:
# https://github.com/tinygrad/tinygrad/blob/ac9c96dae1656dc220ee4acc39cef4dd449aa850/tinygrad/device.py#L26
name = self.name if "modeld" not in self.name else "MainProcess"
cloudlog.info(f"starting python {self.module}")
self.proc = Process(name=name, target=self.launcher, args=(self.module, self.name))
self.proc.start()
self.watchdog_seen = False
self.shutting_down = False
class DaemonProcess(ManagerProcess):
"""Python process that has to stay running across manager restart.
This is used for athena so you don't lose SSH access when restarting manager."""
def __init__(self, name, module, param_name, enabled=True):
self.name = name
self.module = module
self.param_name = param_name
self.enabled = enabled
self.params = None
@staticmethod
def should_run(started, params, CP):
return True
def prepare(self) -> None:
pass
def start(self) -> None:
if self.params is None:
self.params = Params()
pid = self.params.get(self.param_name, encoding='utf-8')
if pid is not None:
try:
os.kill(int(pid), 0)
with open(f'/proc/{pid}/cmdline') as f:
if self.module in f.read():
# daemon is running
return
except (OSError, FileNotFoundError):
# process is dead
pass
cloudlog.info(f"starting daemon {self.name}")
proc = subprocess.Popen(['python', '-m', self.module],
stdin=open('/dev/null'),
stdout=open('/dev/null', 'w'),
stderr=open('/dev/null', 'w'),
preexec_fn=os.setpgrp)
self.params.put(self.param_name, str(proc.pid))
def stop(self, retry=True, block=True, sig=None) -> None:
pass
def ensure_running(procs: ValuesView[ManagerProcess], started: bool, params=None, CP: car.CarParams=None,
not_run: list[str] | None=None) -> list[ManagerProcess]:
if not_run is None:
not_run = []
running = []
for p in procs:
if p.enabled and p.name not in not_run and p.should_run(started, params, CP):
running.append(p)
else:
p.stop(block=False)
p.check_watchdog(started)
for p in running:
p.start()
return running

View File

@@ -0,0 +1,144 @@
import os
import operator
import importlib.util
from cereal import car
from openpilot.common.params import Params
from openpilot.system.hardware import PC, TICI
from openpilot.system.manager.process import PythonProcess, NativeProcess, DaemonProcess
FLASK_AVAILABLE = importlib.util.find_spec("flask") is not None
WEBCAM = os.getenv("USE_WEBCAM") is not None
def driverview(started: bool, params: Params, CP: car.CarParams) -> bool:
return started or params.get_bool("IsDriverViewEnabled")
def notcar(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and CP.notCar
def iscar(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and not CP.notCar
def logging(started: bool, params: Params, CP: car.CarParams) -> bool:
run = (not CP.notCar) or not params.get_bool("DisableLogging")
return started and run
def ublox_available() -> bool:
return os.path.exists('/dev/ttyHS0') and not os.path.exists('/persist/comma/use-quectel-gps')
def ublox(started: bool, params: Params, CP: car.CarParams) -> bool:
use_ublox = ublox_available()
if use_ublox != params.get_bool("UbloxAvailable"):
params.put_bool("UbloxAvailable", use_ublox)
return started and use_ublox
def joystick(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and params.get_bool("JoystickDebugMode")
def not_joystick(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and not params.get_bool("JoystickDebugMode")
def long_maneuver(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and params.get_bool("LongitudinalManeuverMode")
def not_long_maneuver(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and not params.get_bool("LongitudinalManeuverMode")
def qcomgps(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and not ublox_available()
def always_run(started: bool, params: Params, CP: car.CarParams) -> bool:
return True
def only_onroad(started: bool, params: Params, CP: car.CarParams) -> bool:
return started
def only_offroad(started: bool, params: Params, CP: car.CarParams) -> bool:
return not started
def enable_updated(started: bool, params: Params, CP: car.CarParams) -> bool:
return not started and params.get_bool("SoftwareMenu")
def check_fleet(started, params, CP: car.CarParams) -> bool:
return FLASK_AVAILABLE
def or_(*fns):
return lambda *args: operator.or_(*(fn(*args) for fn in fns))
def and_(*fns):
return lambda *args: operator.and_(*(fn(*args) for fn in fns))
def enable_dm(started, params, CP: car.CarParams) -> bool:
return (started or params.get_bool("IsDriverViewEnabled")) and params.get_int("DisableDM") == 0
def enable_connect(started, params, CP: car.CarParams) -> bool:
return params.get_int("EnableConnect") > 0
def c3x_lite(started: bool, params: Params, CP: car.CarParams) -> bool:
return started and params.get_bool("HardwareC3xLite")
procs = [
DaemonProcess("manage_athenad", "system.athena.manage_athenad", "AthenadPid"),
NativeProcess("loggerd", "system/loggerd", ["./loggerd"], logging),
NativeProcess("encoderd", "system/loggerd", ["./encoderd"], only_onroad),
NativeProcess("stream_encoderd", "system/loggerd", ["./encoderd", "--stream"], notcar),
PythonProcess("logmessaged", "system.logmessaged", always_run),
NativeProcess("camerad", "system/camerad", ["./camerad"], driverview, enabled=not WEBCAM),
PythonProcess("webcamerad", "tools.webcam.camerad", driverview, enabled=WEBCAM),
NativeProcess("logcatd", "system/logcatd", ["./logcatd"], only_onroad),
NativeProcess("proclogd", "system/proclogd", ["./proclogd"], only_onroad),
PythonProcess("micd", "system.micd", iscar),
PythonProcess("timed", "system.timed", always_run, enabled=not PC),
PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad),
PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", enable_dm, enabled=(WEBCAM or not PC)),
#NativeProcess("mapsd", "selfdrive/navd", ["./mapsd"], only_onroad),
#NativeProcess("mapsd", "selfdrive/navd", ["./mapsd"], always_run),
#PythonProcess("navmodeld", "selfdrive.modeld.navmodeld", only_onroad),
PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC),
NativeProcess("ui", "selfdrive/ui", ["./ui"], always_run, watchdog_max_dt=(5 if not PC else None)),
PythonProcess("soundd", "selfdrive.ui.soundd", only_onroad),
PythonProcess("locationd", "selfdrive.locationd.locationd", only_onroad),
NativeProcess("_pandad", "selfdrive/pandad", ["./pandad"], always_run, enabled=False),
PythonProcess("calibrationd", "selfdrive.locationd.calibrationd", only_onroad),
PythonProcess("torqued", "selfdrive.locationd.torqued", only_onroad),
PythonProcess("controlsd", "selfdrive.controls.controlsd", and_(not_joystick, iscar)),
PythonProcess("joystickd", "tools.joystick.joystickd", or_(joystick, notcar)),
PythonProcess("selfdrived", "selfdrive.selfdrived.selfdrived", only_onroad),
PythonProcess("card", "selfdrive.car.card", only_onroad),
PythonProcess("deleter", "system.loggerd.deleter", always_run),
PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", enable_dm, enabled=(WEBCAM or not PC)),
PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI),
PythonProcess("navd", "selfdrive.navd.navd", only_onroad),
PythonProcess("pandad", "selfdrive.pandad.pandad", always_run),
PythonProcess("paramsd", "selfdrive.locationd.paramsd", only_onroad),
PythonProcess("lagd", "selfdrive.locationd.lagd", only_onroad),
PythonProcess("ubloxd", "system.ubloxd.ubloxd", ublox, enabled=TICI),
PythonProcess("pigeond", "system.ubloxd.pigeond", ublox, enabled=TICI),
PythonProcess("plannerd", "selfdrive.controls.plannerd", not_long_maneuver),
PythonProcess("maneuversd", "tools.longitudinal_maneuvers.maneuversd", long_maneuver),
PythonProcess("radard", "selfdrive.controls.radard", only_onroad),
PythonProcess("hardwared", "system.hardware.hardwared", always_run),
PythonProcess("tombstoned", "system.tombstoned", always_run, enabled=not PC),
PythonProcess("updated", "system.updated.updated", enable_updated, enabled=not PC),
PythonProcess("uploader", "system.loggerd.uploader", enable_connect),
PythonProcess("statsd", "system.statsd", always_run),
# debug procs
NativeProcess("bridge", "cereal/messaging", ["./bridge"], notcar),
PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar),
PythonProcess("webjoystick", "tools.bodyteleop.web", notcar),
PythonProcess("joystick", "tools.joystick.joystick_control", and_(joystick, iscar)),
#PythonProcess("fleet_manager", "selfdrive.frogpilot.fleetmanager.fleet_manager", check_fleet, enabled=not PC),
PythonProcess("fleet_manager", "selfdrive.frogpilot.fleetmanager.fleet_manager", check_fleet),
PythonProcess("carrot_man", "selfdrive.carrot.carrot_man", always_run),#, enabled=not PC),
# c3x lite
PythonProcess("beep", "selfdrive.controls.beep", c3x_lite, enabled=TICI),
]
managed_processes = {p.name: p for p in procs}

View File

View File

@@ -0,0 +1,68 @@
import os
import pytest
import signal
import time
from cereal import car
from openpilot.common.params import Params
import openpilot.system.manager.manager as manager
from openpilot.system.manager.process import ensure_running
from openpilot.system.manager.process_config import managed_processes, procs
from openpilot.system.hardware import HARDWARE
os.environ['FAKEUPLOAD'] = "1"
MAX_STARTUP_TIME = 3
BLACKLIST_PROCS = ['manage_athenad', 'pandad', 'pigeond']
class TestManager:
def setup_method(self):
HARDWARE.set_power_save(False)
# ensure clean CarParams
params = Params()
params.clear_all()
def teardown_method(self):
manager.manager_cleanup()
def test_manager_prepare(self):
os.environ['PREPAREONLY'] = '1'
manager.main()
def test_duplicate_procs(self):
assert len(procs) == len(managed_processes), "Duplicate process names"
def test_blacklisted_procs(self):
# TODO: ensure there are blacklisted procs until we have a dedicated test
assert len(BLACKLIST_PROCS), "No blacklisted procs to test not_run"
@pytest.mark.skip("this test is flaky the way it's currently written, should be moved to test_onroad")
def test_clean_exit(self, subtests):
"""
Ensure all processes exit cleanly when stopped.
"""
HARDWARE.set_power_save(False)
manager.manager_init()
CP = car.CarParams.new_message()
procs = ensure_running(managed_processes.values(), True, Params(), CP, not_run=BLACKLIST_PROCS)
time.sleep(10)
for p in procs:
with subtests.test(proc=p.name):
state = p.get_process_state_msg()
assert state.running, f"{p.name} not running"
exit_code = p.stop(retry=False)
assert p.name not in BLACKLIST_PROCS, f"{p.name} was started"
assert exit_code is not None, f"{p.name} failed to exit"
# TODO: interrupted blocking read exits with 1 in cereal. use a more unique return code
exit_codes = [0, 1]
if p.sigkill:
exit_codes = [-signal.SIGKILL]
assert exit_code in exit_codes, f"{p.name} died with {exit_code}"