From 37207f61e9bb5ee4acf9bd2c4a2ecce16035274d Mon Sep 17 00:00:00 2001
From: Adeeb Shihadeh <adeebshihadeh@gmail.com>
Date: Tue, 10 Mar 2026 21:23:32 -0700
Subject: [PATCH] Replace `mull` with custom mutation test runner (#3130)

simple mutation!
---
 .github/workflows/tests.yml                   |   6 +-
 .gitignore                                    |   2 +-
 opendbc/safety/tests/libsafety/SConscript     |  21 +-
 .../safety/tests/libsafety/libsafety_py.py    |  10 +-
 opendbc/safety/tests/mutation.py              | 633 ++++++++++++++++++
 opendbc/safety/tests/mutation.sh              |  20 -
 opendbc/safety/tests/test.sh                  |   7 +-
 pyproject.toml                                |   5 +
 setup.sh                                      |  21 -
 uv.lock                                       |  41 ++
 10 files changed, 704 insertions(+), 62 deletions(-)
 create mode 100755 opendbc/safety/tests/mutation.py
 delete mode 100755 opendbc/safety/tests/mutation.sh

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index dc90c32b..0e68bf79 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -44,17 +44,13 @@ jobs:
         include:
           - os: ${{ github.repository == 'commaai/opendbc' && 'namespace-profile-amd64-8x16' || 'ubuntu-latest' }}
           - os: ${{ github.repository == 'commaai/opendbc' && 'namespace-profile-macos-8x14' || 'macos-latest' }}
-    env:
-      GIT_REF: ${{ github.event_name == 'push' && github.ref == format('refs/heads/{0}', github.event.repository.default_branch) && github.event.before || format('origin/{0}', github.event.repository.default_branch) }}
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0  # need master to get diff
       - name: Run mutation tests
         run: |
           source setup.sh
           scons -j8
-          cd opendbc/safety/tests && ./mutation.sh
+          python opendbc/safety/tests/mutation.py
 
   car_diff:
     name: car diff
diff --git a/.gitignore b/.gitignore
index 269017bf..7aebff78 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,8 +20,8 @@
 /dist/
 .vscode/
 __pycache__/
-mull.yml
 *.profraw
+.sconf_temp/
 
 opendbc/can/build/
 opendbc/can/obj/
diff --git a/opendbc/safety/tests/libsafety/SConscript b/opendbc/safety/tests/libsafety/SConscript
index 10adf3f8..3117d985 100644
--- a/opendbc/safety/tests/libsafety/SConscript
+++ b/opendbc/safety/tests/libsafety/SConscript
@@ -26,18 +26,17 @@ env = Environment(
   tools=["default", "compilation_db"],
 )
 
-# The Mull plugin injects mutations that are dormant unless run with mull-runner
-if system == "Darwin":
-  mull_plugin = Dir('#').abspath + '/.mull/lib/mull-ir-frontend-18'
-else:
-  mull_plugin = '/usr/lib/mull-ir-frontend-18'
-if os.path.exists(mull_plugin):
-  # Only use mull plugin if it exists
-  env['CC'] = 'clang-18'
-  env.Append(CFLAGS=['-fprofile-arcs', '-ftest-coverage', f'-fpass-plugin={mull_plugin}'])
+# add coverage if available
+# Use TryCompile (not TryLink) because -nostdlib in CFLAGS breaks the link probe.
+conf = Configure(env, log_file=os.devnull)
+prev = env['CFLAGS'][:]
+env.Append(CFLAGS=['-fprofile-arcs', '-ftest-coverage'])
+has_coverage = conf.TryCompile('int x;\n', '.c')
+env['CFLAGS'] = prev
+if has_coverage:
+  env.Append(CFLAGS=['-fprofile-arcs', '-ftest-coverage'])
   env.Append(LINKFLAGS=['-fprofile-arcs', '-ftest-coverage'])
-  if system == "Darwin":
-    env.PrependENVPath('PATH', '/opt/homebrew/opt/llvm@18/bin')
+env = conf.Finish()
 
 safety = env.SharedObject("safety.os", "safety.c")
 libsafety = env.SharedLibrary("libsafety.so", [safety])
diff --git a/opendbc/safety/tests/libsafety/libsafety_py.py b/opendbc/safety/tests/libsafety/libsafety_py.py
index e93b782e..be98fdab 100644
--- a/opendbc/safety/tests/libsafety/libsafety_py.py
+++ b/opendbc/safety/tests/libsafety/libsafety_py.py
@@ -4,7 +4,6 @@ from cffi import FFI
 from opendbc.safety import LEN_TO_DLC
 
 libsafety_dir = os.path.dirname(os.path.abspath(__file__))
-libsafety_fn = os.path.join(libsafety_dir, "libsafety.so")
 
 ffi = FFI()
 
@@ -77,11 +76,18 @@ bool get_honda_fwd_brake(void);
 void set_honda_alt_brake_msg(bool c);
 void set_honda_bosch_long(bool c);
 int get_honda_hw(void);
+
+void mutation_set_active_mutant(int id);
+int mutation_get_active_mutant(void);
 """)
 
 class LibSafety:
   pass
-libsafety: LibSafety = ffi.dlopen(libsafety_fn)
+libsafety: LibSafety = ffi.dlopen(os.path.join(libsafety_dir, "libsafety.so"))
+
+def load(path):
+  global libsafety
+  libsafety = ffi.dlopen(str(path))
 
 def make_CANPacket(addr: int, bus: int, dat):
   ret = ffi.new('CANPacket_t *')
diff --git a/opendbc/safety/tests/mutation.py b/opendbc/safety/tests/mutation.py
new file mode 100755
index 00000000..a137ef68
--- /dev/null
+++ b/opendbc/safety/tests/mutation.py
@@ -0,0 +1,633 @@
+#!/usr/bin/env python3
+import argparse
+import io
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+import unittest
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from collections import Counter, namedtuple
+from dataclasses import dataclass
+from pathlib import Path
+
+import tree_sitter_c as ts_c
+import tree_sitter as ts
+
+
+ROOT = Path(__file__).resolve().parents[3]
+SAFETY_DIR = ROOT / "opendbc" / "safety"
+SAFETY_TESTS_DIR = ROOT / "opendbc" / "safety" / "tests"
+SAFETY_C_REL = Path("opendbc/safety/tests/libsafety/safety.c")
+
+ANSI_RESET = "\033[0m"
+ANSI_BOLD = "\033[1m"
+ANSI_RED = "\033[31m"
+ANSI_GREEN = "\033[32m"
+ANSI_YELLOW = "\033[33m"
+
+COMPARISON_OPERATOR_MAP = {
+  "==": "!=",
+  "!=": "==",
+  ">": "<=",
+  ">=": "<",
+  "<": ">=",
+  "<=": ">",
+}
+
+MUTATOR_FAMILIES = {
+  "increment": ("update_expression", {"++": "--"}),
+  "decrement": ("update_expression", {"--": "++"}),
+  "comparison": ("binary_expression", COMPARISON_OPERATOR_MAP),
+  "boundary": ("number_literal", {}),
+  "bitwise_assignment": ("assignment_expression", {"&=": "|=", "|=": "&=", "^=": "&="}),
+  "bitwise": ("binary_expression", {"&": "|", "|": "&", "^": "&"}),
+  "arithmetic_assignment": ("assignment_expression", {"+=": "-=", "-=": "+=", "*=": "/=", "/=": "*=", "%=": "*="}),
+  "arithmetic": ("binary_expression", {"+": "-", "-": "+", "*": "/", "/": "*", "%": "*"}),
+  "remove_negation": ("unary_expression", {"!": ""}),
+}
+
+
+_RawSite = namedtuple('_RawSite', 'expr_start expr_end op_start op_end line original_op mutated_op mutator')
+
+
+@dataclass(frozen=True)
+class MutationSite:
+  site_id: int
+  expr_start: int
+  expr_end: int
+  op_start: int
+  op_end: int
+  line: int
+  original_op: str
+  mutated_op: str
+  mutator: str
+  origin_file: Path
+  origin_line: int
+
+
+@dataclass(frozen=True)
+class MutantResult:
+  site: MutationSite
+  outcome: str  # killed | survived | infra_error
+  test_sec: float
+  details: str
+
+
+def colorize(text, color):
+  term = os.environ.get("TERM", "")
+  if not sys.stdout.isatty() or term in ("", "dumb") or "NO_COLOR" in os.environ:
+    return text
+  return f"{color}{text}{ANSI_RESET}"
+
+
+def format_mutation(original_op, mutated_op):
+  return colorize(f"{original_op}->{mutated_op}", ANSI_RED)
+
+
+def _parse_int_literal(token):
+  m = re.fullmatch(r"([0-9][0-9a-fA-FxX]*)([uUlL]*)", token)
+  if m is None:
+    return None
+  body, suffix = m.groups()
+  try:
+    value = int(body, 0)
+  except ValueError:
+    return None
+  base = "hex" if body.lower().startswith("0x") else "dec"
+  return value, base, suffix
+
+
+def _site_key(site):
+  return (site.op_start, site.op_end, site.mutator)
+
+
+def _is_in_constexpr_context(node):
+  """Check if a node is inside a static or file-scope variable initializer."""
+  current = node.parent
+  while current is not None:
+    if current.type == "init_declarator":
+      decl = current.parent
+      if decl and decl.type == "declaration":
+        for child in decl.children:
+          if child.type == "storage_class_specifier" and child.text == b"static":
+            return True
+        if decl.parent and decl.parent.type == "translation_unit":
+          return True
+    current = current.parent
+  return False
+
+
+def _prepare_for_parsing(txt):
+  """Blank line markers and replace __typeof__() for tree-sitter. Preserves byte offsets."""
+  result = re.sub(
+    r'^[ \t]*#[ \t]+\d+[ \t]+"[^\n]*',
+    lambda m: " " * len(m.group()),
+    txt,
+    flags=re.MULTILINE,
+  )
+  # Replace __typeof__(...) with padded int (handle nested parens)
+  parts = []
+  i = 0
+  for m in re.finditer(r"(?:__typeof__|typeof)\s*\(", result):
+    if m.start() < i:
+      continue  # skip nested typeof inside already-replaced region
+    parts.append(result[i:m.start()])
+    depth = 1
+    j = m.end()
+    while j < len(result) and depth > 0:
+      if result[j] == "(":
+        depth += 1
+      elif result[j] == ")":
+        depth -= 1
+      j += 1
+    parts.append("int" + " " * (j - m.start() - 3))
+    i = j
+  parts.append(result[i:])
+  return "".join(parts)
+
+
+def enumerate_sites(input_source, preprocessed_file):
+  subprocess.run([
+    "cc", "-E", "-std=gnu11", "-nostdlib", "-fno-builtin", "-DALLOW_DEBUG",
+    f"-I{ROOT}", f"-I{ROOT / 'opendbc/safety/board'}",
+    str(input_source), "-o", str(preprocessed_file),
+  ], cwd=ROOT, capture_output=True, check=True)
+
+  txt = preprocessed_file.read_text()
+
+  # Build line map from preprocessor directives
+  line_map = {}
+  current_map_file = None
+  current_map_line = None
+  directive_re = re.compile(r'^\s*#\s*(\d+)\s+"([^"]+)"')
+  for pp_line_num, pp_line in enumerate(txt.splitlines(keepends=True), start=1):
+    m = directive_re.match(pp_line)
+    if m:
+      current_map_line = int(m.group(1))
+      current_map_file = Path(m.group(2)).resolve()
+      continue
+    if current_map_file is not None and current_map_line is not None:
+      line_map[pp_line_num] = (current_map_file, current_map_line)
+      current_map_line += 1
+
+  # Parse with tree-sitter
+  parser = ts.Parser(ts.Language(ts_c.language()))
+  tree = parser.parse(_prepare_for_parsing(txt).encode())
+
+  # Build rule map
+  rule_map = {}
+  counts = {}
+  for mutator, (node_kind, op_map) in MUTATOR_FAMILIES.items():
+    counts[mutator] = 0
+    if mutator == "boundary":
+      continue
+    for original_op, mutated_op in op_map.items():
+      rule_map.setdefault((node_kind, original_op), []).append((mutator, original_op, mutated_op))
+
+  # Walk tree to find mutation sites
+  deduped = {}
+  build_incompatible_keys = set()
+  stack = [tree.root_node]
+  while stack:
+    node = stack.pop()
+    kind = node.type
+
+    # Boundary mutations: find number_literals inside comparison operands
+    if kind == "binary_expression":
+      cmp_op = node.child_by_field_name("operator")
+      if cmp_op and cmp_op.type in COMPARISON_OPERATOR_MAP:
+        lit_stack = []
+        for field in ("left", "right"):
+          operand = node.child_by_field_name(field)
+          if operand:
+            lit_stack.append(operand)
+        while lit_stack:
+          n = lit_stack.pop()
+          if n.type == "number_literal":
+            token = txt[n.start_byte:n.end_byte]
+            parsed = _parse_int_literal(token)
+            if parsed:
+              value, base, suffix = parsed
+              mutated = f"0x{value + 1:X}{suffix}" if base == "hex" else f"{value + 1}{suffix}"
+              line = n.start_point[0] + 1
+              bsite = _RawSite(n.start_byte, n.end_byte, n.start_byte, n.end_byte, line, token, mutated, "boundary")
+              key = _site_key(bsite)
+              deduped[key] = bsite
+              if _is_in_constexpr_context(n):
+                build_incompatible_keys.add(key)
+          lit_stack.extend(n.children)
+
+    # Operator mutations: any node with an operator child
+    op_child = node.child_by_field_name("operator")
+    if op_child:
+      for mutator, original_op, mutated_op in rule_map.get((kind, op_child.type), []):
+        line = node.start_point[0] + 1
+        site = _RawSite(node.start_byte, node.end_byte, op_child.start_byte, op_child.end_byte, line, original_op, mutated_op, mutator)
+        key = _site_key(site)
+        deduped[key] = site
+        if _is_in_constexpr_context(node):
+          build_incompatible_keys.add(key)
+
+    stack.extend(node.children)
+
+  sites = sorted(deduped.values(), key=lambda s: (s.op_start, s.mutator))
+  out = []
+  build_incompatible_site_ids = set()
+  for s in sites:
+    mapped = line_map.get(s.line)
+    if mapped is None:
+      continue
+    origin_file, origin_line = mapped
+    if SAFETY_DIR not in origin_file.parents and origin_file != SAFETY_DIR:
+      continue
+    site_id = len(out)
+    site = MutationSite(
+      site_id=site_id, expr_start=s.expr_start, expr_end=s.expr_end,
+      op_start=s.op_start, op_end=s.op_end, line=s.line,
+      original_op=s.original_op, mutated_op=s.mutated_op, mutator=s.mutator,
+      origin_file=origin_file, origin_line=origin_line,
+    )
+    if _site_key(s) in build_incompatible_keys:
+      build_incompatible_site_ids.add(site_id)
+    out.append(site)
+    counts[s.mutator] += 1
+  return out, counts, build_incompatible_site_ids, txt
+
+
+def _build_core_tests(catalog):
+  """Build test ordering for core (non-mode) files.
+
+  One test per unique method name from evenly-spaced modules,
+  ordered by how widely each method is shared. Methods inherited by many
+  classes exercise the most fundamental safety logic and run first.
+  """
+  MAX_PER_METHOD = 5
+  method_freq = {}
+  method_by_module = {}
+  for name in sorted(catalog.keys()):
+    for test_id in catalog[name]:
+      method = test_id.rsplit(".", 1)[-1]
+      method_freq[method] = method_freq.get(method, 0) + 1
+      if method not in method_by_module:
+        method_by_module[method] = {}
+      if name not in method_by_module[method]:
+        method_by_module[method][name] = test_id
+  # Pick evenly-spaced modules for each method to maximize configuration diversity
+  method_ids = {}
+  for method, module_map in method_by_module.items():
+    modules = sorted(module_map.keys())
+    n = len(modules)
+    if n <= MAX_PER_METHOD:
+      method_ids[method] = [module_map[m] for m in modules]
+    else:
+      step = n / MAX_PER_METHOD
+      method_ids[method] = [module_map[modules[int(i * step)]] for i in range(MAX_PER_METHOD)]
+  # Round-robin: first instance of each method (by freq), then second, etc.
+  # This ensures diverse early coverage with failfast.
+  sorted_methods = sorted(method_freq, key=lambda m: -method_freq[m])
+  ordered = []
+  for round_idx in range(MAX_PER_METHOD):
+    for m in sorted_methods:
+      ids = method_ids.get(m, [])
+      if round_idx < len(ids):
+        ordered.append(ids[round_idx])
+  return ordered
+
+
+def build_priority_tests(site, catalog, core_tests):
+  """Build an ordered list of test IDs for a mutation site.
+
+  For mode files: all tests from the matching test_<mode>.py module.
+  For core files: uses the pre-computed core_tests ordering.
+  """
+  src = site.origin_file
+  rel_parts = src.relative_to(ROOT).parts
+  is_mode = len(rel_parts) >= 4 and rel_parts[:3] == ("opendbc", "safety", "modes")
+
+  if is_mode:
+    mode_file = f"test_{src.stem}.py"
+    return list(catalog.get(mode_file, []))
+  return core_tests
+
+
+def format_site_snippet(site, context_lines=2):
+  source = site.origin_file
+  text = source.read_text()
+  lines = text.splitlines()
+  display_ln = site.origin_line
+  line_idx = display_ln - 1
+  start = max(0, line_idx - context_lines)
+  end = min(len(lines), line_idx + context_lines + 1)
+
+  line_text = lines[line_idx]
+  rel_start = line_text.find(site.original_op)
+  if rel_start < 0:
+    rel_start = 0
+  rel_end = rel_start + len(site.original_op)
+
+  snippet_lines = []
+  width = len(str(end))
+  for idx in range(start, end):
+    num = idx + 1
+    prefix = ">" if idx == line_idx else " "
+    line = lines[idx]
+    if idx == line_idx:
+      marker = colorize(f"[[{site.original_op}->{site.mutated_op}]]", ANSI_RED)
+      line = f"{line[:rel_start]}{marker}{line[rel_end:]}"
+    snippet_lines.append(f"{prefix} {num:>{width}} | {line}")
+  return "\n".join(snippet_lines)
+
+
+def render_progress(completed, total, killed, survived, infra, elapsed_sec):
+  bar_width = 30
+  filled = int((completed / total) * bar_width)
+  bar = "#" * filled + "-" * (bar_width - filled)
+
+  rate = completed / elapsed_sec if elapsed_sec > 0 else 0.0
+  remaining = total - completed
+  eta = (remaining / rate) if rate > 0 else 0.0
+
+  killed_text = colorize(f"k:{killed}", ANSI_GREEN)
+  survived_text = colorize(f"s:{survived}", ANSI_RED)
+  infra_text = colorize(f"i:{infra}", ANSI_YELLOW)
+
+  return f"[{bar}] {completed}/{total} {killed_text} {survived_text} {infra_text} mps:{rate:.2f} elapsed:{elapsed_sec:.1f}s eta:{eta:.1f}s"
+
+
+def print_live_status(text, *, final=False):
+  if sys.stdout.isatty():
+    print("\r" + text, end="\n" if final else "", flush=True)
+  else:
+    print(text, flush=True)
+
+
+def _discover_test_catalog():
+  loader = unittest.TestLoader()
+  catalog = {}
+  for test_file in sorted(SAFETY_TESTS_DIR.glob("test_*.py")):
+    module_name = ".".join(test_file.relative_to(ROOT).with_suffix("").parts)
+    suite = loader.loadTestsFromName(module_name)
+    catalog[test_file.name] = [t.id() for group in suite for t in group]
+  return catalog
+
+
+def run_unittest(targets, lib_path, mutant_id, verbose):
+  from opendbc.safety.tests.libsafety import libsafety_py
+  libsafety_py.load(lib_path)
+  libsafety_py.libsafety.mutation_set_active_mutant(mutant_id)
+
+  if verbose:
+    print("Running unittest targets:", ", ".join(targets), flush=True)
+
+  loader = unittest.TestLoader()
+  stream = io.StringIO()
+  runner = unittest.TextTestRunner(stream=stream, verbosity=0, failfast=True)
+
+  suite = unittest.TestSuite()
+  for target in targets:
+    suite.addTests(loader.loadTestsFromName(target))
+  result = runner.run(suite)
+  if result.failures:
+    return result.failures[0][0].id()
+  if result.errors:
+    return result.errors[0][0].id()
+  return None
+
+
+def _instrument_source(source, sites):
+  # Sort by start ascending, end descending (outermost first when same start)
+  sorted_sites = sorted(sites, key=lambda s: (s.expr_start, -s.expr_end))
+
+  # Build containment forest using a stack
+  roots = []
+  stack = []
+  for site in sorted_sites:
+    while stack and stack[-1][0].expr_end <= site.expr_start:
+      stack.pop()
+    node = [site, []]
+    if stack:
+      stack[-1][1].append(node)
+    else:
+      roots.append(node)
+    stack.append(node)
+
+  def build_replacement(site, children):
+    parts = []
+    pos = site.expr_start
+    op_rel = None
+    running_len = 0
+
+    for child_site, child_children in children:
+      seg = source[pos : child_site.expr_start]
+      if op_rel is None and site.op_start >= pos and site.op_start < child_site.expr_start:
+        op_rel = running_len + (site.op_start - pos)
+      parts.append(seg)
+      running_len += len(seg)
+
+      child_repl = build_replacement(child_site, child_children)
+      parts.append(child_repl)
+      running_len += len(child_repl)
+      pos = child_site.expr_end
+
+    seg = source[pos : site.expr_end]
+    if op_rel is None and site.op_start >= pos:
+      op_rel = running_len + (site.op_start - pos)
+    parts.append(seg)
+
+    expr_text = "".join(parts)
+    op_len = site.op_end - site.op_start
+    assert op_rel is not None and expr_text[op_rel : op_rel + op_len] == site.original_op, (
+      f"Operator mismatch (site_id={site.site_id}): expected {site.original_op!r} at offset {op_rel}"
+    )
+    mutated_expr = f"{expr_text[:op_rel]}{site.mutated_op}{expr_text[op_rel + op_len :]}"
+    return f"((__mutation_active_id == {site.site_id}) ? ({mutated_expr}) : ({expr_text}))"
+
+  result_parts = []
+  pos = 0
+  for site, children in roots:
+    result_parts.append(source[pos : site.expr_start])
+    result_parts.append(build_replacement(site, children))
+    pos = site.expr_end
+  result_parts.append(source[pos:])
+  return "".join(result_parts)
+
+
+def compile_mutated_library(preprocessed_source, sites, output_so):
+  instrumented = _instrument_source(preprocessed_source, sites)
+
+  prelude = """
+    static int __mutation_active_id = -1;
+    void mutation_set_active_mutant(int id) { __mutation_active_id = id; }
+    int mutation_get_active_mutant(void) { return __mutation_active_id; }
+  """
+  marker_re = re.compile(r'^\s*#\s+\d+\s+"[^\n]*\n?', re.MULTILINE)
+  instrumented = prelude + marker_re.sub("", instrumented)
+
+  mutation_source = output_so.with_suffix(".c")
+  mutation_source.write_text(instrumented)
+
+  subprocess.run([
+    "cc", "-shared", "-fPIC", "-w", "-fno-builtin", "-std=gnu11",
+    "-g0", "-O0", "-DALLOW_DEBUG",
+    str(mutation_source), "-o", str(output_so),
+  ], cwd=ROOT, check=True)
+
+
+def eval_mutant(site, targets, lib_path, verbose):
+  try:
+    t0 = time.perf_counter()
+    failed_test = run_unittest(targets, lib_path, mutant_id=site.site_id, verbose=verbose)
+    duration = time.perf_counter() - t0
+    if failed_test is not None:
+      return MutantResult(site, "killed", duration, "")
+    return MutantResult(site, "survived", duration, "")
+  except Exception as exc:
+    return MutantResult(site, "infra_error", 0.0, str(exc))
+
+
+def main():
+  parser = argparse.ArgumentParser(description="Run strict safety mutation")
+  parser.add_argument("-j", type=int, default=max((os.cpu_count() or 1) - 1, 1), help="parallel mutants to run")
+  parser.add_argument("--max-mutants", type=int, default=0, help="optional limit for debugging (0 means all)")
+  parser.add_argument("--list-only", action="store_true", help="list discovered candidates and exit")
+  parser.add_argument("--verbose", action="store_true", help="print extra debug output")
+  args = parser.parse_args()
+
+  start = time.perf_counter()
+
+  with tempfile.TemporaryDirectory(prefix="mutation-op-run-") as run_tmp_dir:
+    preprocessed_file = Path(run_tmp_dir) / "safety_preprocessed.c"
+    sites, mutator_counts, build_incompatible_ids, preprocessed_source = enumerate_sites(ROOT / SAFETY_C_REL, preprocessed_file)
+    assert len(sites) > 0
+
+    if args.max_mutants > 0:
+      sites = sites[: args.max_mutants]
+
+    mutator_summary = ", ".join(f"{name} ({c})" for name in MUTATOR_FAMILIES if (c := mutator_counts.get(name, 0)) > 0)
+    print(f"Found {len(sites)} unique candidates: {mutator_summary}", flush=True)
+    if args.list_only:
+      for site in sites:
+        mutation = format_mutation(site.original_op, site.mutated_op)
+        print(f"  #{site.site_id:03d} {site.origin_file.relative_to(ROOT)}:{site.origin_line} [{site.mutator}] {mutation}")
+      return 0
+
+    print(f"Running {len(sites)} mutants with {args.j} workers", flush=True)
+
+    discovered_count = len(sites)
+    selected_site_ids = {s.site_id for s in sites}
+    build_incompatible_ids &= selected_site_ids
+    pruned_compile_sites = len(build_incompatible_ids)
+    if pruned_compile_sites > 0:
+      sites = [s for s in sites if s.site_id not in build_incompatible_ids]
+      print(f"Pruned {pruned_compile_sites} build-incompatible mutants from constant-expression initializers", flush=True)
+    if not sites:
+      print("Failed to build mutation library: all sites were pruned as build-incompatible", flush=True)
+      return 2
+
+    mutation_lib = Path(run_tmp_dir) / "libsafety_mutation.so"
+    compile_mutated_library(preprocessed_source, sites, mutation_lib)
+
+    # Discover all tests by importing modules in the main process.
+    # Forked workers inherit these imports, eliminating per-worker import cost.
+    catalog = _discover_test_catalog()
+
+    # Baseline smoke check
+    baseline_ids = catalog.get("test_defaults.py", [])[:5]
+    baseline_failed = run_unittest(baseline_ids, mutation_lib, mutant_id=-1, verbose=args.verbose)
+    if baseline_failed is not None:
+      print("Baseline smoke failed with mutant_id=-1; aborting to avoid false kill signals.", flush=True)
+      print(f"  failed_test: {baseline_failed}", flush=True)
+      return 2
+
+    # Pre-compute test targets per mutation site
+    core_tests = _build_core_tests(catalog)
+    site_targets = {site.site_id: build_priority_tests(site, catalog, core_tests) for site in sites}
+
+    results = []
+    counts = Counter()
+
+    with ProcessPoolExecutor(max_workers=args.j) as pool:
+      future_map = {
+        pool.submit(eval_mutant, site, site_targets[site.site_id], mutation_lib, args.verbose): site for site in sites
+      }
+      print_live_status(render_progress(0, len(sites), 0, 0, 0, 0.0))
+      try:
+        for fut in as_completed(future_map):
+          try:
+            res = fut.result()
+          except Exception:
+            site = future_map[fut]
+            res = MutantResult(site, "killed", 0.0, "worker process crashed")
+          results.append(res)
+          counts[res.outcome] += 1
+          elapsed_now = time.perf_counter() - start
+          done = len(results) == len(sites)
+          print_live_status(render_progress(len(results), len(sites), counts["killed"], counts["survived"],
+                                            counts["infra_error"], elapsed_now), final=done)
+      except Exception:
+        # Pool broken — mark all unfinished mutants as killed (crash = behavioral change detected)
+        completed_ids = {r.site.site_id for r in results}
+        for site in sites:
+          if site.site_id not in completed_ids:
+            results.append(MutantResult(site, "killed", 0.0, "pool broken"))
+            counts["killed"] += 1
+        elapsed_now = time.perf_counter() - start
+        print_live_status(render_progress(len(results), len(sites), counts["killed"], counts["survived"], counts["infra_error"], elapsed_now), final=True)
+
+    survivors = sorted((r for r in results if r.outcome == "survived"), key=lambda r: r.site.site_id)
+    if survivors:
+      print("", flush=True)
+      print(colorize("Surviving mutants", ANSI_RED), flush=True)
+      for res in survivors:
+        loc = f"{res.site.origin_file.relative_to(ROOT)}:{res.site.origin_line}"
+        mutation = format_mutation(res.site.original_op, res.site.mutated_op)
+        print(f"- #{res.site.site_id} {loc} [{res.site.mutator}] {mutation}", flush=True)
+        print(format_site_snippet(res.site), flush=True)
+
+    infra_results = sorted((r for r in results if r.outcome == "infra_error"), key=lambda r: r.site.site_id)
+    if infra_results:
+      print("", flush=True)
+      print(colorize("Infra errors", ANSI_YELLOW), flush=True)
+      for res in infra_results:
+        loc = f"{res.site.origin_file.relative_to(ROOT)}:{res.site.origin_line}"
+        detail = res.details.splitlines()[0] if res.details else "unknown error"
+        print(f"- #{res.site.site_id} {loc}: {detail}", flush=True)
+
+    elapsed = time.perf_counter() - start
+    total_test_sec = sum(r.test_sec for r in results)
+    print("", flush=True)
+    print(colorize("Mutation summary", ANSI_BOLD), flush=True)
+    print(f"  discovered: {discovered_count}", flush=True)
+    print(f"  pruned_build_incompatible: {pruned_compile_sites}", flush=True)
+    print(f"  total: {len(sites)}", flush=True)
+    print(f"  killed: {colorize(str(counts['killed']), ANSI_GREEN)}", flush=True)
+    print(f"  survived: {colorize(str(counts['survived']), ANSI_RED)}", flush=True)
+    print(f"  infra_error: {colorize(str(counts['infra_error']), ANSI_YELLOW)}", flush=True)
+    print(f"  test_time_sum: {total_test_sec:.2f}s", flush=True)
+    print(f"  avg_test_per_mutant: {total_test_sec / len(results):.3f}s", flush=True)
+    print(f"  mutants_per_second: {len(sites) / elapsed:.2f}", flush=True)
+    print(f"  elapsed: {elapsed:.2f}s", flush=True)
+
+    if counts["infra_error"] > 0:
+      return 2
+
+    # TODO: fix these surviving mutants and delete this block
+    known_survivors = {
+      ("opendbc/safety/helpers.h", 40, "arithmetic"),
+      ("opendbc/safety/lateral.h", 105, "boundary"),
+      ("opendbc/safety/lateral.h", 195, "boundary"),
+      ("opendbc/safety/lateral.h", 239, "boundary"),
+      ("opendbc/safety/lateral.h", 337, "arithmetic"),
+    }
+    survivors = [r for r in survivors if (str(r.site.origin_file.relative_to(ROOT)), r.site.origin_line, r.site.mutator) not in known_survivors]
+
+    if survivors:
+      return 1
+    return 0
+
+
+if __name__ == "__main__":
+  raise SystemExit(main())
diff --git a/opendbc/safety/tests/mutation.sh b/opendbc/safety/tests/mutation.sh
deleted file mode 100755
index 3a46b005..00000000
--- a/opendbc/safety/tests/mutation.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
-cd $DIR
-
-source $DIR/../../../setup.sh
-
-GIT_REF="${GIT_REF:-origin/master}"
-GIT_ROOT=$(git rev-parse --show-toplevel)
-cat > $GIT_ROOT/mull.yml <<EOF
-mutators: [cxx_increment, cxx_decrement, cxx_comparison, cxx_boundary, cxx_bitwise_assignment, cxx_bitwise, cxx_arithmetic_assignment, cxx_arithmetic, cxx_remove_negation]
-timeout: 1000000
-gitDiffRef: $GIT_REF
-gitProjectRoot: $GIT_ROOT
-EOF
-
-scons -j4 -D
-
-mull-runner-18 --debug --ld-search-path /lib/x86_64-linux-gnu/ ./libsafety/libsafety.so -test-program=pytest -- -n8 --ignore-glob=misra/*
\ No newline at end of file
diff --git a/opendbc/safety/tests/test.sh b/opendbc/safety/tests/test.sh
index 1ff4ee82..b56e2891 100755
--- a/opendbc/safety/tests/test.sh
+++ b/opendbc/safety/tests/test.sh
@@ -13,10 +13,13 @@ scons -j$(nproc) -D
 # run safety tests and generate coverage data
 pytest -n8 --ignore-glob=misra/*
 
+# NOTE: we accept that these tools will have slight differences,
+# and in return, we get to use the stock toolchain instead of
+# installing LLVM on all users' machines
 if [ "$(uname)" = "Darwin" ]; then
-  GCOV_EXEC="/opt/homebrew/opt/llvm@18/bin/llvm-cov gcov"
+  GCOV_EXEC="llvm-cov gcov"
 else
-  GCOV_EXEC="llvm-cov-18 gcov"
+  GCOV_EXEC="gcov"
 fi
 
 # generate and open report
diff --git a/pyproject.toml b/pyproject.toml
index ab3d3bb6..01298c15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,8 @@ dependencies = [
 testing = [
   "comma-car-segments @ https://huggingface.co/datasets/commaai/commaCarSegments/resolve/main/dist/comma_car_segments-0.1.0-py3-none-any.whl",
   "cffi",
+  "tree-sitter",
+  "tree-sitter-c",
   "gcovr",
   # FIXME: pytest 9.0.0 doesn't support unittest.SkipTest
   "pytest==8.4.2",
@@ -139,6 +141,9 @@ unsupported-operator = "ignore"
 # Return types with complex callable signatures
 invalid-return-type = "ignore"
 
+# unittest TestSuite iteration (TestCase | TestSuite is always iterable in practice)
+not-iterable = "ignore"
+
 # Test class method signature differences
 too-many-positional-arguments = "ignore"
 
diff --git a/setup.sh b/setup.sh
index b318bba7..6966339b 100755
--- a/setup.sh
+++ b/setup.sh
@@ -7,27 +7,6 @@ BASEDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
 export PYTHONPATH=$BASEDIR
 
 # *** dependencies install ***
-if [ "$(uname -s)" = "Linux" ]; then
-  if ! command -v "mull-runner-18" > /dev/null 2>&1; then
-    curl -1sLf 'https://dl.cloudsmith.io/public/mull-project/mull-stable/setup.deb.sh' | sudo -E bash
-    sudo apt-get update && sudo apt-get install -y clang-18 mull-18
-  fi
-elif [ "$(uname -s)" = "Darwin" ]; then
-  if ! brew list llvm@18 &>/dev/null; then
-    brew install llvm@18
-  fi
-  if [ ! -f "$BASEDIR/.mull/bin/mull-runner-18" ]; then
-    MULL_VERSION="0.26.1"
-    MULL_ZIP="Mull-18-${MULL_VERSION}-LLVM-18.1-macOS-arm64-14.7.4.zip"
-    MULL_DIR="Mull-18-${MULL_VERSION}-LLVM-18.1-macOS-arm64-14.7.4"
-    curl -LO "https://github.com/mull-project/mull/releases/download/${MULL_VERSION}/${MULL_ZIP}"
-    unzip -o "$MULL_ZIP"
-    mv "$MULL_DIR" "$BASEDIR/.mull"
-    rm "$MULL_ZIP"
-  fi
-  export PATH="$BASEDIR/.mull/bin:$PATH"
-fi
-
 if ! command -v uv &>/dev/null; then
   echo "'uv' is not installed. Installing 'uv'..."
   curl -LsSf https://astral.sh/uv/install.sh | sh
diff --git a/uv.lock b/uv.lock
index 445222a9..222907d9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -604,6 +604,8 @@ testing = [
     { name = "pytest-subtests" },
     { name = "pytest-xdist" },
     { name = "ruff" },
+    { name = "tree-sitter" },
+    { name = "tree-sitter-c" },
     { name = "ty" },
     { name = "zstandard" },
 ]
@@ -635,6 +637,8 @@ requires-dist = [
     { name = "ruff", marker = "extra == 'testing'" },
     { name = "scons" },
     { name = "tqdm" },
+    { name = "tree-sitter", marker = "extra == 'testing'" },
+    { name = "tree-sitter-c", marker = "extra == 'testing'" },
     { name = "ty", marker = "extra == 'testing'" },
     { name = "zstandard", marker = "extra == 'testing'" },
 ]
@@ -1000,6 +1004,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
 ]
 
+[[package]]
+name = "tree-sitter"
+version = "0.25.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/22/88a1e00b906d26fa8a075dd19c6c3116997cb884bf1b3c023deb065a344d/tree_sitter-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ca72d841215b6573ed0655b3a5cd1133f9b69a6fa561aecad40dca9029d75b", size = 146752, upload-time = "2025-09-25T17:37:24.775Z" },
+    { url = "https://files.pythonhosted.org/packages/57/1c/22cc14f3910017b7a76d7358df5cd315a84fe0c7f6f7b443b49db2e2790d/tree_sitter-0.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc0351cfe5022cec5a77645f647f92a936b38850346ed3f6d6babfbeeeca4d26", size = 137765, upload-time = "2025-09-25T17:37:26.103Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/0c/d0de46ded7d5b34631e0f630d9866dab22d3183195bf0f3b81de406d6622/tree_sitter-0.25.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1799609636c0193e16c38f366bda5af15b1ce476df79ddaae7dd274df9e44266", size = 604643, upload-time = "2025-09-25T17:37:27.398Z" },
+    { url = "https://files.pythonhosted.org/packages/34/38/b735a58c1c2f60a168a678ca27b4c1a9df725d0bf2d1a8a1c571c033111e/tree_sitter-0.25.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e65ae456ad0d210ee71a89ee112ac7e72e6c2e5aac1b95846ecc7afa68a194c", size = 632229, upload-time = "2025-09-25T17:37:28.463Z" },
+    { url = "https://files.pythonhosted.org/packages/32/f6/cda1e1e6cbff5e28d8433578e2556d7ba0b0209d95a796128155b97e7693/tree_sitter-0.25.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:49ee3c348caa459244ec437ccc7ff3831f35977d143f65311572b8ba0a5f265f", size = 629861, upload-time = "2025-09-25T17:37:29.593Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/19/427e5943b276a0dd74c2a1f1d7a7393443f13d1ee47dedb3f8127903c080/tree_sitter-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:56ac6602c7d09c2c507c55e58dc7026b8988e0475bd0002f8a386cce5e8e8adc", size = 127304, upload-time = "2025-09-25T17:37:30.549Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d9/eef856dc15f784d85d1397a17f3ee0f82df7778efce9e1961203abfe376a/tree_sitter-0.25.2-cp311-cp311-win_arm64.whl", hash = "sha256:b3d11a3a3ac89bb8a2543d75597f905a9926f9c806f40fcca8242922d1cc6ad5", size = 113990, upload-time = "2025-09-25T17:37:31.852Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941, upload-time = "2025-09-25T17:37:34.813Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699, upload-time = "2025-09-25T17:37:36.349Z" },
+    { url = "https://files.pythonhosted.org/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125, upload-time = "2025-09-25T17:37:37.725Z" },
+    { url = "https://files.pythonhosted.org/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418, upload-time = "2025-09-25T17:37:38.922Z" },
+    { url = "https://files.pythonhosted.org/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250, upload-time = "2025-09-25T17:37:40.039Z" },
+    { url = "https://files.pythonhosted.org/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156, upload-time = "2025-09-25T17:37:41.132Z" },
+    { url = "https://files.pythonhosted.org/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984, upload-time = "2025-09-25T17:37:42.074Z" },
+]
+
+[[package]]
+name = "tree-sitter-c"
+version = "0.24.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/f5/ba8cd08d717277551ade8537d3aa2a94b907c6c6e0fbcf4e4d8b1c747fa3/tree_sitter_c-0.24.1.tar.gz", hash = "sha256:7d2d0cda0b8dda428c81440c1e94367f9f13548eedca3f49768bde66b1422ad6", size = 228014, upload-time = "2025-05-24T17:32:58.384Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/c7/c817be36306e457c2d36cc324789046390d9d8c555c38772429ffdb7d361/tree_sitter_c-0.24.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9c06ac26a1efdcc8b26a8a6970fbc6997c4071857359e5837d4c42892d45fe1e", size = 80940, upload-time = "2025-05-24T17:32:49.967Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/42/283909467290b24fdbc29bb32ee20e409a19a55002b43175d66d091ca1a4/tree_sitter_c-0.24.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:942bcd7cbecd810dcf7ca6f8f834391ebf0771a89479646d891ba4ca2fdfdc88", size = 86304, upload-time = "2025-05-24T17:32:51.271Z" },
+    { url = "https://files.pythonhosted.org/packages/94/53/fb4f61d4e5f15ec3da85774a4df8e58d3b5b73036cf167f0203b4dd9d158/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a74cfd7a11ca5a961fafd4d751892ee65acae667d2818968a6f079397d8d28c", size = 109996, upload-time = "2025-05-24T17:32:52.119Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/e8/fc541d34ee81c386c5453c2596c1763e8e9cd7cb0725f39d7dfa2276afa4/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6a807705a3978911dc7ee26a7ad36dcfacb6adfc13c190d496660ec9bd66707", size = 98137, upload-time = "2025-05-24T17:32:53.361Z" },
+    { url = "https://files.pythonhosted.org/packages/32/c6/d0563319cae0d5b5780a92e2806074b24afea2a07aa4c10599b899bda3ec/tree_sitter_c-0.24.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:789781afcb710df34144f7e2a20cd80e325114b9119e3956c6bd1dd2d365df98", size = 94148, upload-time = "2025-05-24T17:32:54.855Z" },
+    { url = "https://files.pythonhosted.org/packages/50/5a/6361df7f3fa2310c53a0d26b4702a261c332da16fa9d801e381e3a86e25f/tree_sitter_c-0.24.1-cp310-abi3-win_amd64.whl", hash = "sha256:290bff0f9c79c966496ebae45042f77543e6e4aea725f40587a8611d566231a8", size = 84703, upload-time = "2025-05-24T17:32:56.084Z" },
+    { url = "https://files.pythonhosted.org/packages/22/6a/210a302e8025ac492cbaea58d3720d66b7d8034c5d747ac5e4d2d235aa25/tree_sitter_c-0.24.1-cp310-abi3-win_arm64.whl", hash = "sha256:d46bbda06f838c2dcb91daf767813671fd366b49ad84ff37db702129267b46e1", size = 82715, upload-time = "2025-05-24T17:32:57.248Z" },
+]
+
 [[package]]
 name = "ty"
 version = "0.0.18"