Release 260111

2026-01-11 18:23:29 +08:00
commit 3721ecbf8a
2601 changed files with 855070 additions and 0 deletions
--- a/system/updated/casync/casync.py
+++ b/system/updated/casync/casync.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+import io
+import lzma
+import os
+import pathlib
+import struct
+import sys
+import time
+from abc import ABC, abstractmethod
+from collections import defaultdict, namedtuple
+from collections.abc import Callable
+from typing import IO
+
+import requests
+from Crypto.Hash import SHA512
+from openpilot.system.updated.casync import tar
+from openpilot.system.updated.casync.common import create_casync_tar_package
+
+CA_FORMAT_INDEX = 0x96824d9c7b129ff9
+CA_FORMAT_TABLE = 0xe75b9e112f17417d
+CA_FORMAT_TABLE_TAIL_MARKER = 0xe75b9e112f17417
+FLAGS = 0xb000000000000000
+
+CA_HEADER_LEN = 48
+CA_TABLE_HEADER_LEN = 16
+CA_TABLE_ENTRY_LEN = 40
+CA_TABLE_MIN_LEN = CA_TABLE_HEADER_LEN + CA_TABLE_ENTRY_LEN
+
+CHUNK_DOWNLOAD_TIMEOUT = 60
+CHUNK_DOWNLOAD_RETRIES = 3
+
+CAIBX_DOWNLOAD_TIMEOUT = 120
+
+Chunk = namedtuple('Chunk', ['sha', 'offset', 'length'])
+ChunkDict = dict[bytes, Chunk]
+
+
+class ChunkReader(ABC):
+  @abstractmethod
+  def read(self, chunk: Chunk) -> bytes:
+    ...
+
+
+class BinaryChunkReader(ChunkReader):
+  """Reads chunks from a local file"""
+  def __init__(self, file_like: IO[bytes]) -> None:
+    super().__init__()
+    self.f = file_like
+
+  def read(self, chunk: Chunk) -> bytes:
+    self.f.seek(chunk.offset)
+    return self.f.read(chunk.length)
+
+
+class FileChunkReader(BinaryChunkReader):
+  def __init__(self, path: str) -> None:
+    super().__init__(open(path, 'rb'))
+
+  def __del__(self):
+    self.f.close()
+
+
+class RemoteChunkReader(ChunkReader):
+  """Reads lzma compressed chunks from a remote store"""
+
+  def __init__(self, url: str) -> None:
+    super().__init__()
+    self.url = url
+    self.session = requests.Session()
+
+  def read(self, chunk: Chunk) -> bytes:
+    sha_hex = chunk.sha.hex()
+    url = os.path.join(self.url, sha_hex[:4], sha_hex + ".cacnk")
+
+    if os.path.isfile(url):
+      with open(url, 'rb') as f:
+        contents = f.read()
+    else:
+      for i in range(CHUNK_DOWNLOAD_RETRIES):
+        try:
+          resp = self.session.get(url, timeout=CHUNK_DOWNLOAD_TIMEOUT)
+          break
+        except Exception:
+          if i == CHUNK_DOWNLOAD_RETRIES - 1:
+            raise
+          time.sleep(CHUNK_DOWNLOAD_TIMEOUT)
+
+      resp.raise_for_status()
+      contents = resp.content
+
+    decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_AUTO)
+    return decompressor.decompress(contents)
+
+
+class DirectoryTarChunkReader(BinaryChunkReader):
+  """creates a tar archive of a directory and reads chunks from it"""
+
+  def __init__(self, path: str, cache_file: str) -> None:
+    create_casync_tar_package(pathlib.Path(path), pathlib.Path(cache_file))
+
+    self.f = open(cache_file, "rb")
+    return super().__init__(self.f)
+
+  def __del__(self):
+    self.f.close()
+    os.unlink(self.f.name)
+
+
+def parse_caibx(caibx_path: str) -> list[Chunk]:
+  """Parses the chunks from a caibx file. Can handle both local and remote files.
+  Returns a list of chunks with hash, offset and length"""
+  caibx: io.BufferedIOBase
+  if os.path.isfile(caibx_path):
+    caibx = open(caibx_path, 'rb')
+  else:
+    resp = requests.get(caibx_path, timeout=CAIBX_DOWNLOAD_TIMEOUT)
+    resp.raise_for_status()
+    caibx = io.BytesIO(resp.content)
+
+  caibx.seek(0, os.SEEK_END)
+  caibx_len = caibx.tell()
+  caibx.seek(0, os.SEEK_SET)
+
+  # Parse header
+  length, magic, flags, min_size, _, max_size = struct.unpack("<QQQQQQ", caibx.read(CA_HEADER_LEN))
+  assert flags == flags
+  assert length == CA_HEADER_LEN
+  assert magic == CA_FORMAT_INDEX
+
+  # Parse table header
+  length, magic = struct.unpack("<QQ", caibx.read(CA_TABLE_HEADER_LEN))
+  assert magic == CA_FORMAT_TABLE
+
+  # Parse chunks
+  num_chunks = (caibx_len - CA_HEADER_LEN - CA_TABLE_MIN_LEN) // CA_TABLE_ENTRY_LEN
+  chunks = []
+
+  offset = 0
+  for i in range(num_chunks):
+    new_offset = struct.unpack("<Q", caibx.read(8))[0]
+
+    sha = caibx.read(32)
+    length = new_offset - offset
+
+    assert length <= max_size
+
+    # Last chunk can be smaller
+    if i < num_chunks - 1:
+      assert length >= min_size
+
+    chunks.append(Chunk(sha, offset, length))
+    offset = new_offset
+
+  caibx.close()
+  return chunks
+
+
+def build_chunk_dict(chunks: list[Chunk]) -> ChunkDict:
+  """Turn a list of chunks into a dict for faster lookups based on hash.
+  Keep first chunk since it's more likely to be already downloaded."""
+  r = {}
+  for c in chunks:
+    if c.sha not in r:
+      r[c.sha] = c
+  return r
+
+
+def extract(target: list[Chunk],
+            sources: list[tuple[str, ChunkReader, ChunkDict]],
+            out_path: str,
+            progress: Callable[[int], None] = None):
+  stats: dict[str, int] = defaultdict(int)
+
+  mode = 'rb+' if os.path.exists(out_path) else 'wb'
+  with open(out_path, mode) as out:
+    for cur_chunk in target:
+
+      # Find source for desired chunk
+      for name, chunk_reader, store_chunks in sources:
+        if cur_chunk.sha in store_chunks:
+          bts = chunk_reader.read(store_chunks[cur_chunk.sha])
+
+          # Check length
+          if len(bts) != cur_chunk.length:
+            continue
+
+          # Check hash
+          if SHA512.new(bts, truncate="256").digest() != cur_chunk.sha:
+            continue
+
+          # Write to output
+          out.seek(cur_chunk.offset)
+          out.write(bts)
+
+          stats[name] += cur_chunk.length
+
+          if progress is not None:
+            progress(sum(stats.values()))
+
+          break
+      else:
+        raise RuntimeError("Desired chunk not found in provided stores")
+
+  return stats
+
+
+def extract_directory(target: list[Chunk],
+            sources: list[tuple[str, ChunkReader, ChunkDict]],
+            out_path: str,
+            tmp_file: str,
+            progress: Callable[[int], None] = None):
+  """extract a directory stored as a casync tar archive"""
+
+  stats = extract(target, sources, tmp_file, progress)
+
+  with open(tmp_file, "rb") as f:
+    tar.extract_tar_archive(f, pathlib.Path(out_path))
+
+  return stats
+
+
+def print_stats(stats: dict[str, int]):
+  total_bytes = sum(stats.values())
+  print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB")
+  for name, total in stats.items():
+    print(f"  {name}: {total / 1024 / 1024:.2f} MB ({total / total_bytes * 100:.1f}%)")
+
+
+def extract_simple(caibx_path, out_path, store_path):
+  # (name, callback, chunks)
+  target = parse_caibx(caibx_path)
+  sources = [
+    # (store_path, RemoteChunkReader(store_path), build_chunk_dict(target)),
+    (store_path, FileChunkReader(store_path), build_chunk_dict(target)),
+  ]
+
+  return extract(target, sources, out_path)
+
+
+if __name__ == "__main__":
+  caibx = sys.argv[1]
+  out = sys.argv[2]
+  store = sys.argv[3]
+
+  stats = extract_simple(caibx, out, store)
+  print_stats(stats)
--- a/system/updated/casync/common.py
+++ b/system/updated/casync/common.py
@@ -0,0 +1,61 @@
+import dataclasses
+import json
+import pathlib
+import subprocess
+
+from openpilot.system.version import BUILD_METADATA_FILENAME, BuildMetadata
+from openpilot.system.updated.casync import tar
+
+
+CASYNC_ARGS = ["--with=symlinks", "--with=permissions", "--compression=xz", "--chunk-size=16M"]
+CASYNC_FILES = [BUILD_METADATA_FILENAME]
+
+
+def run(cmd):
+  return subprocess.check_output(cmd)
+
+
+def get_exclude_set(path) -> set[str]:
+  exclude_set = set(CASYNC_FILES)
+
+  for file in path.rglob("*"):
+    if file.is_file() or file.is_symlink():
+
+      while file.resolve() != path.resolve():
+        exclude_set.add(str(file.relative_to(path)))
+
+        file = file.parent
+
+  return exclude_set
+
+
+def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata):
+  with open(path / BUILD_METADATA_FILENAME, "w") as f:
+    build_metadata_dict = dataclasses.asdict(build_metadata)
+    build_metadata_dict["openpilot"].pop("is_dirty")  # this is determined at runtime
+    build_metadata_dict.pop("channel")                # channel is unrelated to the build itself
+    f.write(json.dumps(build_metadata_dict))
+
+
+def is_not_git(path: pathlib.Path) -> bool:
+  return ".git" not in path.parts
+
+
+def create_casync_tar_package(target_dir: pathlib.Path, output_path: pathlib.Path):
+  tar.create_tar_archive(output_path, target_dir, is_not_git)
+
+
+def create_casync_from_file(file: pathlib.Path, output_dir: pathlib.Path, caibx_name: str):
+  caibx_file = output_dir / f"{caibx_name}.caibx"
+  run(["casync", "make", *CASYNC_ARGS, caibx_file, str(file)])
+
+  return caibx_file
+
+
+def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caibx_name: str):
+  tar_file = output_dir / f"{caibx_name}.tar"
+  create_casync_tar_package(target_dir, tar_file)
+  caibx_file = create_casync_from_file(tar_file, output_dir, caibx_name)
+  tar_file.unlink()
+  digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip()
+  return digest, caibx_file
--- a/system/updated/casync/tar.py
+++ b/system/updated/casync/tar.py
@@ -0,0 +1,39 @@
+import pathlib
+import tarfile
+from typing import IO
+from collections.abc import Callable
+
+
+def include_default(_) -> bool:
+  return True
+
+
+def create_tar_archive(filename: pathlib.Path, directory: pathlib.Path, include: Callable[[pathlib.Path], bool] = include_default):
+  """Creates a tar archive of a directory"""
+
+  with tarfile.open(filename, 'w') as tar:
+    for file in sorted(directory.rglob("*"), key=lambda f: f.stat().st_size if f.is_file() else 0, reverse=True):
+      if not include(file):
+        continue
+      relative_path = str(file.relative_to(directory))
+      if file.is_symlink():
+        info = tarfile.TarInfo(relative_path)
+        info.type = tarfile.SYMTYPE
+        info.linkpath = str(file.readlink())
+        tar.addfile(info)
+
+      elif file.is_file():
+        info = tarfile.TarInfo(relative_path)
+        info.size = file.stat().st_size
+        info.type = tarfile.REGTYPE
+        info.mode = file.stat().st_mode
+        with file.open('rb') as f:
+          tar.addfile(info, f)
+
+
+def extract_tar_archive(fh: IO[bytes], directory: pathlib.Path):
+  """Extracts a tar archive to a directory"""
+
+  tar = tarfile.open(fileobj=fh, mode='r')
+  tar.extractall(str(directory), filter=lambda info, path: info)
+  tar.close()
--- a/system/updated/casync/tests/test_casync.py
+++ b/system/updated/casync/tests/test_casync.py
@@ -0,0 +1,264 @@
+import pytest
+import os
+import pathlib
+import tempfile
+import subprocess
+
+from openpilot.system.updated.casync import casync
+from openpilot.system.updated.casync import tar
+
+# dd if=/dev/zero of=/tmp/img.raw bs=1M count=2
+# sudo losetup -f /tmp/img.raw
+# losetup -a | grep img.raw
+LOOPBACK = os.environ.get('LOOPBACK', None)
+
+
+@pytest.mark.skip("not used yet")
+class TestCasync:
+  @classmethod
+  def setup_class(cls):
+    cls.tmpdir = tempfile.TemporaryDirectory()
+
+    # Build example contents
+    chunk_a = [i % 256 for i in range(1024)] * 512
+    chunk_b = [(256 - i) % 256 for i in range(1024)] * 512
+    zeroes = [0] * (1024 * 128)
+    contents = chunk_a + chunk_b + zeroes + chunk_a
+
+    cls.contents = bytes(contents)
+
+    # Write to file
+    cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.bin')
+    with open(cls.orig_fn, 'wb') as f:
+      f.write(cls.contents)
+
+    # Create casync files
+    cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx')
+    cls.store_fn = os.path.join(cls.tmpdir.name, 'store')
+    subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn])
+
+    target = casync.parse_caibx(cls.manifest_fn)
+    hashes = [c.sha.hex() for c in target]
+
+    # Ensure we have chunk reuse
+    assert len(hashes) > len(set(hashes))
+
+  def setup_method(self):
+    # Clear target_lo
+    if LOOPBACK is not None:
+      self.target_lo = LOOPBACK
+      with open(self.target_lo, 'wb') as f:
+        f.write(b"0" * len(self.contents))
+
+    self.target_fn = os.path.join(self.tmpdir.name, next(tempfile._get_candidate_names()))
+    self.seed_fn = os.path.join(self.tmpdir.name, next(tempfile._get_candidate_names()))
+
+  def teardown_method(self):
+    for fn in [self.target_fn, self.seed_fn]:
+      try:
+        os.unlink(fn)
+      except FileNotFoundError:
+        pass
+
+  def test_simple_extract(self):
+    target = casync.parse_caibx(self.manifest_fn)
+
+    sources = [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+    stats = casync.extract(target, sources, self.target_fn)
+
+    with open(self.target_fn, 'rb') as target_f:
+      assert target_f.read() == self.contents
+
+    assert stats['remote'] == len(self.contents)
+
+  def test_seed(self):
+    target = casync.parse_caibx(self.manifest_fn)
+
+    # Populate seed with half of the target contents
+    with open(self.seed_fn, 'wb') as seed_f:
+      seed_f.write(self.contents[:len(self.contents) // 2])
+
+    sources = [('seed', casync.FileChunkReader(self.seed_fn), casync.build_chunk_dict(target))]
+    sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+    stats = casync.extract(target, sources, self.target_fn)
+
+    with open(self.target_fn, 'rb') as target_f:
+      assert target_f.read() == self.contents
+
+    assert stats['seed'] > 0
+    assert stats['remote'] < len(self.contents)
+
+  def test_already_done(self):
+    """Test that an already flashed target doesn't download any chunks"""
+    target = casync.parse_caibx(self.manifest_fn)
+
+    with open(self.target_fn, 'wb') as f:
+      f.write(self.contents)
+
+    sources = [('target', casync.FileChunkReader(self.target_fn), casync.build_chunk_dict(target))]
+    sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+
+    stats = casync.extract(target, sources, self.target_fn)
+
+    with open(self.target_fn, 'rb') as f:
+      assert f.read() == self.contents
+
+    assert stats['target'] == len(self.contents)
+
+  def test_chunk_reuse(self):
+    """Test that chunks that are reused are only downloaded once"""
+    target = casync.parse_caibx(self.manifest_fn)
+
+    # Ensure target exists
+    with open(self.target_fn, 'wb'):
+      pass
+
+    sources = [('target', casync.FileChunkReader(self.target_fn), casync.build_chunk_dict(target))]
+    sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+
+    stats = casync.extract(target, sources, self.target_fn)
+
+    with open(self.target_fn, 'rb') as f:
+      assert f.read() == self.contents
+
+    assert stats['remote'] < len(self.contents)
+
+  @pytest.mark.skipif(not LOOPBACK, reason="requires loopback device")
+  def test_lo_simple_extract(self):
+    target = casync.parse_caibx(self.manifest_fn)
+    sources = [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+
+    stats = casync.extract(target, sources, self.target_lo)
+
+    with open(self.target_lo, 'rb') as target_f:
+      assert target_f.read(len(self.contents)) == self.contents
+
+    assert stats['remote'] == len(self.contents)
+
+  @pytest.mark.skipif(not LOOPBACK, reason="requires loopback device")
+  def test_lo_chunk_reuse(self):
+    """Test that chunks that are reused are only downloaded once"""
+    target = casync.parse_caibx(self.manifest_fn)
+
+    sources = [('target', casync.FileChunkReader(self.target_lo), casync.build_chunk_dict(target))]
+    sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+
+    stats = casync.extract(target, sources, self.target_lo)
+
+    with open(self.target_lo, 'rb') as f:
+      assert f.read(len(self.contents)) == self.contents
+
+    assert stats['remote'] < len(self.contents)
+
+
+@pytest.mark.skip("not used yet")
+class TestCasyncDirectory:
+  """Tests extracting a directory stored as a casync tar archive"""
+
+  NUM_FILES = 16
+
+  @classmethod
+  def setup_cache(cls, directory, files=None):
+    if files is None:
+      files = range(cls.NUM_FILES)
+
+    chunk_a = [i % 256 for i in range(1024)] * 512
+    chunk_b = [(256 - i) % 256 for i in range(1024)] * 512
+    zeroes = [0] * (1024 * 128)
+    cls.contents = chunk_a + chunk_b + zeroes + chunk_a
+    cls.contents = bytes(cls.contents)
+
+    for i in files:
+      with open(os.path.join(directory, f"file_{i}.txt"), "wb") as f:
+        f.write(cls.contents)
+
+      os.symlink(f"file_{i}.txt", os.path.join(directory, f"link_{i}.txt"))
+
+  @classmethod
+  def setup_class(cls):
+    cls.tmpdir = tempfile.TemporaryDirectory()
+
+    # Create casync files
+    cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx')
+    cls.store_fn = os.path.join(cls.tmpdir.name, 'store')
+
+    cls.directory_to_extract = tempfile.TemporaryDirectory()
+    cls.setup_cache(cls.directory_to_extract.name)
+
+    cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.tar')
+    tar.create_tar_archive(cls.orig_fn, pathlib.Path(cls.directory_to_extract.name))
+
+    subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn])
+
+  @classmethod
+  def teardown_class(cls):
+    cls.tmpdir.cleanup()
+    cls.directory_to_extract.cleanup()
+
+  def setup_method(self):
+    self.cache_dir = tempfile.TemporaryDirectory()
+    self.working_dir = tempfile.TemporaryDirectory()
+    self.out_dir = tempfile.TemporaryDirectory()
+
+  def teardown_method(self):
+    self.cache_dir.cleanup()
+    self.working_dir.cleanup()
+    self.out_dir.cleanup()
+
+  def run_test(self):
+    target = casync.parse_caibx(self.manifest_fn)
+
+    cache_filename = os.path.join(self.working_dir.name, "cache.tar")
+    tmp_filename = os.path.join(self.working_dir.name, "tmp.tar")
+
+    sources = [('cache', casync.DirectoryTarChunkReader(self.cache_dir.name, cache_filename), casync.build_chunk_dict(target))]
+    sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
+
+    stats = casync.extract_directory(target, sources, pathlib.Path(self.out_dir.name), tmp_filename)
+
+    with open(os.path.join(self.out_dir.name, "file_0.txt"), "rb") as f:
+      assert f.read() == self.contents
+
+    with open(os.path.join(self.out_dir.name, "link_0.txt"), "rb") as f:
+      assert f.read() == self.contents
+      assert os.readlink(os.path.join(self.out_dir.name, "link_0.txt")) == "file_0.txt"
+
+    return stats
+
+  def test_no_cache(self):
+    self.setup_cache(self.cache_dir.name, [])
+    stats = self.run_test()
+    assert stats['remote'] > 0
+    assert stats['cache'] == 0
+
+  def test_full_cache(self):
+    self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
+    stats = self.run_test()
+    assert stats['remote'] == 0
+    assert stats['cache'] > 0
+
+  def test_one_file_cache(self):
+    self.setup_cache(self.cache_dir.name, range(1))
+    stats = self.run_test()
+    assert stats['remote'] > 0
+    assert stats['cache'] > 0
+    assert stats['cache'] < stats['remote']
+
+  def test_one_file_incorrect_cache(self):
+    self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
+    with open(os.path.join(self.cache_dir.name, "file_0.txt"), "wb") as f:
+      f.write(b"1234")
+
+    stats = self.run_test()
+    assert stats['remote'] > 0
+    assert stats['cache'] > 0
+    assert stats['cache'] > stats['remote']
+
+  def test_one_file_missing_cache(self):
+    self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
+    os.unlink(os.path.join(self.cache_dir.name, "file_12.txt"))
+
+    stats = self.run_test()
+    assert stats['remote'] > 0
+    assert stats['cache'] > 0
+    assert stats['cache'] > stats['remote']