Files
carrotpilot/tinygrad_repo/extra/optimization/rl.py
carrot 77a8919349 TR16 Model, fix radar routine (#211)
* UV+DTR model

* DTR model.. again.

* fix naviGPS

* fix radar...

* fix..

* test

* fix..

* carrot serv

* fix..

* fix.. fleet

* fix.. radar

* fix atc

* Steam Powered model..

* fix.. radarLatFactor range.. 200->500

* fix.. dbc..

* side

* SP v2

* brake light

* fix brakelight

* fix..

* add datetime...

* fix..

* fix..

* fix..

* fix..

* blind spot

* fix tz

* fix..

* ff

* radarLatFactor

* fix.. bsd

* Revert "fix.. bsd"

This reverts commit 1d0d143447.

* fix.. bsd side..

* test

* fix.. e2e conditions

* Revert "test"

This reverts commit 0ce791dbd6.

* TR16

* fix cut-in detect threshold  3.4 -> 2.6

* fix.. jerk_l limit 5->10

* fix..

* fix.. gm

* fix.. OPTIMA_H mass

* fix.. radar..

* fix radar..

* fix..

* Radar...

* fix..

* fix..

* fix..

* fix.. radartrack 3

* fix..

* fix..

* fix..

* merge..

* fix.. canfd

* fix..

* fix..

* fix..

* fix.. radard

* new cut_in

* Revert "new cut_in"

This reverts commit b9b6e9b333.

* fix..

* new cut_in detect...

* fix.. disp..

* fix..

* fix..

* fix.. center radar..

* fix.. radar y_sane..

* fix..

* fix..

* hkg jerk 10 -> 5

* fix..

* fix..

* fix.. radar dbc..

* fix..

* fix.. jLead filter..

* test new radar interface..

* fix..

* fix..

* test time...

* Revert "test time..."

This reverts commit 63e9187736.

* fix radar..

* fix..

* FireHose model..

* tinygrad

* Update interface.py

* fix..

* fix.. nff toyota corolla_tss2

* fix..

* fix..

* fix.. radar

* fix..

* fix.. radar, y_gate

* fix.. radar..

* fix.. for clone..

* scc radar enable at low speed..

* fix.. settings..

* fix.

* fix..

* fix.. radarTimeStep.

* TR16 model again..

* RELEASE.md

* fix cut-in detection...

* fix.. registeration timeout 15sec..

* fix..

* fix.. radar processing.

* fix..

* fix..

* fix..

* fix..

* fix..

* fix..
2025-09-05 15:43:10 +09:00

77 lines
2.5 KiB
Python

import os
import numpy as np
import math, random
from tinygrad.tensor import Tensor
from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict
from tinygrad.codegen.opt.search import actions, bufs_from_lin, get_kernel_actions
from tinygrad.nn.optim import Adam
from extra.optimization.extract_policynet import PolicyNet
from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats, time_linearizer
if __name__ == "__main__":
net = PolicyNet()
if os.path.isfile("/tmp/policynet.safetensors"): load_state_dict(net, safe_load("/tmp/policynet.safetensors"))
optim = Adam(get_parameters(net))
ast_strs = load_worlds()
# select a world
all_feats, all_acts, all_rews = [], [], []
while 1:
Tensor.training = False
lin = ast_str_to_lin(random.choice(ast_strs))
rawbufs = bufs_from_lin(lin)
tm = last_tm = base_tm = time_linearizer(lin, rawbufs)
# take actions
feats, acts, rews = [], [], []
while 1:
feat = lin_to_feats(lin)
feats.append(feat)
probs = net(Tensor([feat])).exp()[0].numpy()
# mask valid actions
valid_action_mask = np.zeros((len(actions)+1), dtype=np.float32)
for x in get_kernel_actions(lin): valid_action_mask[x] = 1
probs *= valid_action_mask
probs /= sum(probs)
act = np.random.choice(len(probs), p=probs)
acts.append(act)
if act == 0:
rews.append(0)
break
try:
lin.apply_opt(actions[act-1])
tm = time_linearizer(lin, rawbufs)
if math.isinf(tm): raise Exception("failed")
rews.append(((last_tm-tm)/base_tm))
last_tm = tm
except Exception:
rews.append(-0.5)
break
#print(f"{tm*1e6:10.2f}", lin.colored_shape())
assert len(feats) == len(acts) and len(acts) == len(rews)
#print(rews)
print(f"***** EPISODE {len(rews)} steps, {sum(rews):5.2f} reward, {base_tm*1e6:12.2f} -> {tm*1e6:12.2f} : {lin.colored_shape()}")
all_feats += feats
all_acts += acts
# rewards to go
for i in range(len(rews)-2, -1, -1): rews[i] += rews[i+1]
all_rews += rews
BS = 32
if len(all_feats) >= BS:
Tensor.training = True
x = Tensor(all_feats[:BS])
mask = np.zeros((BS, len(actions)+1), dtype=np.float32)
mask[range(BS), all_acts[:BS]] = all_rews[:BS]
loss = -(net(x) * Tensor(mask)).mean()
optim.zero_grad()
loss.backward()
optim.step()
all_feats = all_feats[BS:]
all_acts = all_acts[BS:]
all_rews = all_rews[BS:]