From 73f720220bf326d52e9ae644e4cabf634abb06f4 Mon Sep 17 00:00:00 2001
From: James Vecellio-Grant <159560811+Discountchubbs@users.noreply.github.com>
Date: Mon, 9 Feb 2026 20:24:25 -0800
Subject: [PATCH] modeld: simplify model run processing (#37138)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hi! The point of this pr is to make the model run easier to read. On the latest tinygrad numpy().flatten() empirically does the same thing as the internal contiguous().realize().uop.base.buffer.numpy(). numpy() is also documented (docstrings), which can assist new contributors in learning what each potential execution does. Torq_boi or yassine, I know you want proof in the code base, so here it is. As of tinygrad commit 2f55005:

 in tinygrad_repo/tinygrad/tensor.py
Lines 316-318 (def _buffer): ensure the tenso is contiguous() and realized() before accessing the raw buffer.
Line 378 (def numpy): Wraps the buffer access and adds a reshape to match the tensor shape.
self._buffer() is what executes contiguous().realize() and returns the buffer object.
Calling numpy() on that buffer object returns a 1D array (defined in tinygrad/device.py:193 via np.frombuffer).
The reshape(self.shape) at the end of Tensor.numpy() then adds dimensions to that 1D array. The added .flatten() removes those dimensions, flattening it back to a 1D array. Effectively the same as what is currently done, but less complex.
---
 selfdrive/modeld/dmonitoringmodeld.py | 2 +-
 selfdrive/modeld/modeld.py            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/selfdrive/modeld/dmonitoringmodeld.py b/selfdrive/modeld/dmonitoringmodeld.py
index b5cf2f71e..7cac17fa8 100755
--- a/selfdrive/modeld/dmonitoringmodeld.py
+++ b/selfdrive/modeld/dmonitoringmodeld.py
@@ -60,7 +60,7 @@ class ModelState:
     self.warp_inputs_np['transform'][:] = transform[:]
     self.tensor_inputs['input_img'] = self.image_warp(self.warp_inputs['frame'], self.warp_inputs['transform']).realize()
 
-    output = self.model_run(**self.tensor_inputs).contiguous().realize().uop.base.buffer.numpy()
+    output = self.model_run(**self.tensor_inputs).numpy().flatten()
 
     t2 = time.perf_counter()
     return output, t2 - t1
diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py
index f76d5b081..01052840a 100755
--- a/selfdrive/modeld/modeld.py
+++ b/selfdrive/modeld/modeld.py
@@ -211,7 +211,7 @@ class ModelState:
     if prepare_only:
       return None
 
-    self.vision_output = self.vision_run(**vision_inputs).contiguous().realize().uop.base.buffer.numpy()
+    self.vision_output = self.vision_run(**vision_inputs).numpy().flatten()
     vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices))
 
     self.full_input_queues.enqueue({'features_buffer': vision_outputs_dict['hidden_state'], 'desire_pulse': new_desire})
@@ -219,7 +219,7 @@ class ModelState:
       self.numpy_inputs[k][:] = self.full_input_queues.get(k)[k]
     self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
 
-    self.policy_output = self.policy_run(**self.policy_inputs).contiguous().realize().uop.base.buffer.numpy()
+    self.policy_output = self.policy_run(**self.policy_inputs).numpy().flatten()
     policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices))
     combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
     if SEND_RAW_PRED: