Release 260111

This commit is contained in:
Comma Device
2026-01-11 18:23:29 +08:00
commit 3721ecbf8a
2601 changed files with 855070 additions and 0 deletions

246
system/updated/casync/casync.py Executable file
View File

@@ -0,0 +1,246 @@
#!/usr/bin/env python3
import io
import lzma
import os
import pathlib
import struct
import sys
import time
from abc import ABC, abstractmethod
from collections import defaultdict, namedtuple
from collections.abc import Callable
from typing import IO
import requests
from Crypto.Hash import SHA512
from openpilot.system.updated.casync import tar
from openpilot.system.updated.casync.common import create_casync_tar_package
CA_FORMAT_INDEX = 0x96824d9c7b129ff9
CA_FORMAT_TABLE = 0xe75b9e112f17417d
CA_FORMAT_TABLE_TAIL_MARKER = 0xe75b9e112f17417
FLAGS = 0xb000000000000000
CA_HEADER_LEN = 48
CA_TABLE_HEADER_LEN = 16
CA_TABLE_ENTRY_LEN = 40
CA_TABLE_MIN_LEN = CA_TABLE_HEADER_LEN + CA_TABLE_ENTRY_LEN
CHUNK_DOWNLOAD_TIMEOUT = 60
CHUNK_DOWNLOAD_RETRIES = 3
CAIBX_DOWNLOAD_TIMEOUT = 120
Chunk = namedtuple('Chunk', ['sha', 'offset', 'length'])
ChunkDict = dict[bytes, Chunk]
class ChunkReader(ABC):
@abstractmethod
def read(self, chunk: Chunk) -> bytes:
...
class BinaryChunkReader(ChunkReader):
"""Reads chunks from a local file"""
def __init__(self, file_like: IO[bytes]) -> None:
super().__init__()
self.f = file_like
def read(self, chunk: Chunk) -> bytes:
self.f.seek(chunk.offset)
return self.f.read(chunk.length)
class FileChunkReader(BinaryChunkReader):
def __init__(self, path: str) -> None:
super().__init__(open(path, 'rb'))
def __del__(self):
self.f.close()
class RemoteChunkReader(ChunkReader):
"""Reads lzma compressed chunks from a remote store"""
def __init__(self, url: str) -> None:
super().__init__()
self.url = url
self.session = requests.Session()
def read(self, chunk: Chunk) -> bytes:
sha_hex = chunk.sha.hex()
url = os.path.join(self.url, sha_hex[:4], sha_hex + ".cacnk")
if os.path.isfile(url):
with open(url, 'rb') as f:
contents = f.read()
else:
for i in range(CHUNK_DOWNLOAD_RETRIES):
try:
resp = self.session.get(url, timeout=CHUNK_DOWNLOAD_TIMEOUT)
break
except Exception:
if i == CHUNK_DOWNLOAD_RETRIES - 1:
raise
time.sleep(CHUNK_DOWNLOAD_TIMEOUT)
resp.raise_for_status()
contents = resp.content
decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_AUTO)
return decompressor.decompress(contents)
class DirectoryTarChunkReader(BinaryChunkReader):
"""creates a tar archive of a directory and reads chunks from it"""
def __init__(self, path: str, cache_file: str) -> None:
create_casync_tar_package(pathlib.Path(path), pathlib.Path(cache_file))
self.f = open(cache_file, "rb")
return super().__init__(self.f)
def __del__(self):
self.f.close()
os.unlink(self.f.name)
def parse_caibx(caibx_path: str) -> list[Chunk]:
"""Parses the chunks from a caibx file. Can handle both local and remote files.
Returns a list of chunks with hash, offset and length"""
caibx: io.BufferedIOBase
if os.path.isfile(caibx_path):
caibx = open(caibx_path, 'rb')
else:
resp = requests.get(caibx_path, timeout=CAIBX_DOWNLOAD_TIMEOUT)
resp.raise_for_status()
caibx = io.BytesIO(resp.content)
caibx.seek(0, os.SEEK_END)
caibx_len = caibx.tell()
caibx.seek(0, os.SEEK_SET)
# Parse header
length, magic, flags, min_size, _, max_size = struct.unpack("<QQQQQQ", caibx.read(CA_HEADER_LEN))
assert flags == flags
assert length == CA_HEADER_LEN
assert magic == CA_FORMAT_INDEX
# Parse table header
length, magic = struct.unpack("<QQ", caibx.read(CA_TABLE_HEADER_LEN))
assert magic == CA_FORMAT_TABLE
# Parse chunks
num_chunks = (caibx_len - CA_HEADER_LEN - CA_TABLE_MIN_LEN) // CA_TABLE_ENTRY_LEN
chunks = []
offset = 0
for i in range(num_chunks):
new_offset = struct.unpack("<Q", caibx.read(8))[0]
sha = caibx.read(32)
length = new_offset - offset
assert length <= max_size
# Last chunk can be smaller
if i < num_chunks - 1:
assert length >= min_size
chunks.append(Chunk(sha, offset, length))
offset = new_offset
caibx.close()
return chunks
def build_chunk_dict(chunks: list[Chunk]) -> ChunkDict:
"""Turn a list of chunks into a dict for faster lookups based on hash.
Keep first chunk since it's more likely to be already downloaded."""
r = {}
for c in chunks:
if c.sha not in r:
r[c.sha] = c
return r
def extract(target: list[Chunk],
sources: list[tuple[str, ChunkReader, ChunkDict]],
out_path: str,
progress: Callable[[int], None] = None):
stats: dict[str, int] = defaultdict(int)
mode = 'rb+' if os.path.exists(out_path) else 'wb'
with open(out_path, mode) as out:
for cur_chunk in target:
# Find source for desired chunk
for name, chunk_reader, store_chunks in sources:
if cur_chunk.sha in store_chunks:
bts = chunk_reader.read(store_chunks[cur_chunk.sha])
# Check length
if len(bts) != cur_chunk.length:
continue
# Check hash
if SHA512.new(bts, truncate="256").digest() != cur_chunk.sha:
continue
# Write to output
out.seek(cur_chunk.offset)
out.write(bts)
stats[name] += cur_chunk.length
if progress is not None:
progress(sum(stats.values()))
break
else:
raise RuntimeError("Desired chunk not found in provided stores")
return stats
def extract_directory(target: list[Chunk],
sources: list[tuple[str, ChunkReader, ChunkDict]],
out_path: str,
tmp_file: str,
progress: Callable[[int], None] = None):
"""extract a directory stored as a casync tar archive"""
stats = extract(target, sources, tmp_file, progress)
with open(tmp_file, "rb") as f:
tar.extract_tar_archive(f, pathlib.Path(out_path))
return stats
def print_stats(stats: dict[str, int]):
total_bytes = sum(stats.values())
print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB")
for name, total in stats.items():
print(f" {name}: {total / 1024 / 1024:.2f} MB ({total / total_bytes * 100:.1f}%)")
def extract_simple(caibx_path, out_path, store_path):
# (name, callback, chunks)
target = parse_caibx(caibx_path)
sources = [
# (store_path, RemoteChunkReader(store_path), build_chunk_dict(target)),
(store_path, FileChunkReader(store_path), build_chunk_dict(target)),
]
return extract(target, sources, out_path)
if __name__ == "__main__":
caibx = sys.argv[1]
out = sys.argv[2]
store = sys.argv[3]
stats = extract_simple(caibx, out, store)
print_stats(stats)

View File

@@ -0,0 +1,61 @@
import dataclasses
import json
import pathlib
import subprocess
from openpilot.system.version import BUILD_METADATA_FILENAME, BuildMetadata
from openpilot.system.updated.casync import tar
CASYNC_ARGS = ["--with=symlinks", "--with=permissions", "--compression=xz", "--chunk-size=16M"]
CASYNC_FILES = [BUILD_METADATA_FILENAME]
def run(cmd):
return subprocess.check_output(cmd)
def get_exclude_set(path) -> set[str]:
exclude_set = set(CASYNC_FILES)
for file in path.rglob("*"):
if file.is_file() or file.is_symlink():
while file.resolve() != path.resolve():
exclude_set.add(str(file.relative_to(path)))
file = file.parent
return exclude_set
def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata):
with open(path / BUILD_METADATA_FILENAME, "w") as f:
build_metadata_dict = dataclasses.asdict(build_metadata)
build_metadata_dict["openpilot"].pop("is_dirty") # this is determined at runtime
build_metadata_dict.pop("channel") # channel is unrelated to the build itself
f.write(json.dumps(build_metadata_dict))
def is_not_git(path: pathlib.Path) -> bool:
return ".git" not in path.parts
def create_casync_tar_package(target_dir: pathlib.Path, output_path: pathlib.Path):
tar.create_tar_archive(output_path, target_dir, is_not_git)
def create_casync_from_file(file: pathlib.Path, output_dir: pathlib.Path, caibx_name: str):
caibx_file = output_dir / f"{caibx_name}.caibx"
run(["casync", "make", *CASYNC_ARGS, caibx_file, str(file)])
return caibx_file
def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caibx_name: str):
tar_file = output_dir / f"{caibx_name}.tar"
create_casync_tar_package(target_dir, tar_file)
caibx_file = create_casync_from_file(tar_file, output_dir, caibx_name)
tar_file.unlink()
digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip()
return digest, caibx_file

View File

@@ -0,0 +1,39 @@
import pathlib
import tarfile
from typing import IO
from collections.abc import Callable
def include_default(_) -> bool:
return True
def create_tar_archive(filename: pathlib.Path, directory: pathlib.Path, include: Callable[[pathlib.Path], bool] = include_default):
"""Creates a tar archive of a directory"""
with tarfile.open(filename, 'w') as tar:
for file in sorted(directory.rglob("*"), key=lambda f: f.stat().st_size if f.is_file() else 0, reverse=True):
if not include(file):
continue
relative_path = str(file.relative_to(directory))
if file.is_symlink():
info = tarfile.TarInfo(relative_path)
info.type = tarfile.SYMTYPE
info.linkpath = str(file.readlink())
tar.addfile(info)
elif file.is_file():
info = tarfile.TarInfo(relative_path)
info.size = file.stat().st_size
info.type = tarfile.REGTYPE
info.mode = file.stat().st_mode
with file.open('rb') as f:
tar.addfile(info, f)
def extract_tar_archive(fh: IO[bytes], directory: pathlib.Path):
"""Extracts a tar archive to a directory"""
tar = tarfile.open(fileobj=fh, mode='r')
tar.extractall(str(directory), filter=lambda info, path: info)
tar.close()

View File

@@ -0,0 +1,264 @@
import pytest
import os
import pathlib
import tempfile
import subprocess
from openpilot.system.updated.casync import casync
from openpilot.system.updated.casync import tar
# dd if=/dev/zero of=/tmp/img.raw bs=1M count=2
# sudo losetup -f /tmp/img.raw
# losetup -a | grep img.raw
LOOPBACK = os.environ.get('LOOPBACK', None)
@pytest.mark.skip("not used yet")
class TestCasync:
@classmethod
def setup_class(cls):
cls.tmpdir = tempfile.TemporaryDirectory()
# Build example contents
chunk_a = [i % 256 for i in range(1024)] * 512
chunk_b = [(256 - i) % 256 for i in range(1024)] * 512
zeroes = [0] * (1024 * 128)
contents = chunk_a + chunk_b + zeroes + chunk_a
cls.contents = bytes(contents)
# Write to file
cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.bin')
with open(cls.orig_fn, 'wb') as f:
f.write(cls.contents)
# Create casync files
cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx')
cls.store_fn = os.path.join(cls.tmpdir.name, 'store')
subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn])
target = casync.parse_caibx(cls.manifest_fn)
hashes = [c.sha.hex() for c in target]
# Ensure we have chunk reuse
assert len(hashes) > len(set(hashes))
def setup_method(self):
# Clear target_lo
if LOOPBACK is not None:
self.target_lo = LOOPBACK
with open(self.target_lo, 'wb') as f:
f.write(b"0" * len(self.contents))
self.target_fn = os.path.join(self.tmpdir.name, next(tempfile._get_candidate_names()))
self.seed_fn = os.path.join(self.tmpdir.name, next(tempfile._get_candidate_names()))
def teardown_method(self):
for fn in [self.target_fn, self.seed_fn]:
try:
os.unlink(fn)
except FileNotFoundError:
pass
def test_simple_extract(self):
target = casync.parse_caibx(self.manifest_fn)
sources = [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract(target, sources, self.target_fn)
with open(self.target_fn, 'rb') as target_f:
assert target_f.read() == self.contents
assert stats['remote'] == len(self.contents)
def test_seed(self):
target = casync.parse_caibx(self.manifest_fn)
# Populate seed with half of the target contents
with open(self.seed_fn, 'wb') as seed_f:
seed_f.write(self.contents[:len(self.contents) // 2])
sources = [('seed', casync.FileChunkReader(self.seed_fn), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract(target, sources, self.target_fn)
with open(self.target_fn, 'rb') as target_f:
assert target_f.read() == self.contents
assert stats['seed'] > 0
assert stats['remote'] < len(self.contents)
def test_already_done(self):
"""Test that an already flashed target doesn't download any chunks"""
target = casync.parse_caibx(self.manifest_fn)
with open(self.target_fn, 'wb') as f:
f.write(self.contents)
sources = [('target', casync.FileChunkReader(self.target_fn), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract(target, sources, self.target_fn)
with open(self.target_fn, 'rb') as f:
assert f.read() == self.contents
assert stats['target'] == len(self.contents)
def test_chunk_reuse(self):
"""Test that chunks that are reused are only downloaded once"""
target = casync.parse_caibx(self.manifest_fn)
# Ensure target exists
with open(self.target_fn, 'wb'):
pass
sources = [('target', casync.FileChunkReader(self.target_fn), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract(target, sources, self.target_fn)
with open(self.target_fn, 'rb') as f:
assert f.read() == self.contents
assert stats['remote'] < len(self.contents)
@pytest.mark.skipif(not LOOPBACK, reason="requires loopback device")
def test_lo_simple_extract(self):
target = casync.parse_caibx(self.manifest_fn)
sources = [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract(target, sources, self.target_lo)
with open(self.target_lo, 'rb') as target_f:
assert target_f.read(len(self.contents)) == self.contents
assert stats['remote'] == len(self.contents)
@pytest.mark.skipif(not LOOPBACK, reason="requires loopback device")
def test_lo_chunk_reuse(self):
"""Test that chunks that are reused are only downloaded once"""
target = casync.parse_caibx(self.manifest_fn)
sources = [('target', casync.FileChunkReader(self.target_lo), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract(target, sources, self.target_lo)
with open(self.target_lo, 'rb') as f:
assert f.read(len(self.contents)) == self.contents
assert stats['remote'] < len(self.contents)
@pytest.mark.skip("not used yet")
class TestCasyncDirectory:
"""Tests extracting a directory stored as a casync tar archive"""
NUM_FILES = 16
@classmethod
def setup_cache(cls, directory, files=None):
if files is None:
files = range(cls.NUM_FILES)
chunk_a = [i % 256 for i in range(1024)] * 512
chunk_b = [(256 - i) % 256 for i in range(1024)] * 512
zeroes = [0] * (1024 * 128)
cls.contents = chunk_a + chunk_b + zeroes + chunk_a
cls.contents = bytes(cls.contents)
for i in files:
with open(os.path.join(directory, f"file_{i}.txt"), "wb") as f:
f.write(cls.contents)
os.symlink(f"file_{i}.txt", os.path.join(directory, f"link_{i}.txt"))
@classmethod
def setup_class(cls):
cls.tmpdir = tempfile.TemporaryDirectory()
# Create casync files
cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx')
cls.store_fn = os.path.join(cls.tmpdir.name, 'store')
cls.directory_to_extract = tempfile.TemporaryDirectory()
cls.setup_cache(cls.directory_to_extract.name)
cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.tar')
tar.create_tar_archive(cls.orig_fn, pathlib.Path(cls.directory_to_extract.name))
subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn])
@classmethod
def teardown_class(cls):
cls.tmpdir.cleanup()
cls.directory_to_extract.cleanup()
def setup_method(self):
self.cache_dir = tempfile.TemporaryDirectory()
self.working_dir = tempfile.TemporaryDirectory()
self.out_dir = tempfile.TemporaryDirectory()
def teardown_method(self):
self.cache_dir.cleanup()
self.working_dir.cleanup()
self.out_dir.cleanup()
def run_test(self):
target = casync.parse_caibx(self.manifest_fn)
cache_filename = os.path.join(self.working_dir.name, "cache.tar")
tmp_filename = os.path.join(self.working_dir.name, "tmp.tar")
sources = [('cache', casync.DirectoryTarChunkReader(self.cache_dir.name, cache_filename), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract_directory(target, sources, pathlib.Path(self.out_dir.name), tmp_filename)
with open(os.path.join(self.out_dir.name, "file_0.txt"), "rb") as f:
assert f.read() == self.contents
with open(os.path.join(self.out_dir.name, "link_0.txt"), "rb") as f:
assert f.read() == self.contents
assert os.readlink(os.path.join(self.out_dir.name, "link_0.txt")) == "file_0.txt"
return stats
def test_no_cache(self):
self.setup_cache(self.cache_dir.name, [])
stats = self.run_test()
assert stats['remote'] > 0
assert stats['cache'] == 0
def test_full_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
stats = self.run_test()
assert stats['remote'] == 0
assert stats['cache'] > 0
def test_one_file_cache(self):
self.setup_cache(self.cache_dir.name, range(1))
stats = self.run_test()
assert stats['remote'] > 0
assert stats['cache'] > 0
assert stats['cache'] < stats['remote']
def test_one_file_incorrect_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
with open(os.path.join(self.cache_dir.name, "file_0.txt"), "wb") as f:
f.write(b"1234")
stats = self.run_test()
assert stats['remote'] > 0
assert stats['cache'] > 0
assert stats['cache'] > stats['remote']
def test_one_file_missing_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
os.unlink(os.path.join(self.cache_dir.name, "file_12.txt"))
stats = self.run_test()
assert stats['remote'] > 0
assert stats['cache'] > 0
assert stats['cache'] > stats['remote']