broken amfi patch

This commit is contained in:
George Hotz 2022-08-13 10:41:25 +02:00
parent 262efe5784
commit bdfdbc8f8d
3 changed files with 118 additions and 2 deletions

View File

@ -95,9 +95,9 @@ from tinygrad.tensor import Tensor
If all you want to do is ReLU, you are in luck! You can do very fast ReLU (at least 30 MEGAReLUs/sec confirmed)
Requires your Python to be signed with `ane/lib/sign_python.sh` to add the `com.apple.ane.iokit-user-access` entitlement, which also requires `amfi_get_out_of_my_way=0x1` in your `boot-args`. Build the library with `ane/lib/build.sh`
Requires your Python to be signed with `ane/lib/sign_python.sh` to add the `com.apple.ane.iokit-user-access` entitlement, which also requires `sudo nvram boot-args="amfi_get_out_of_my_way=1 ipc_control_port_options=0"`. Build the library with `ane/lib/build.sh`
In order to set arg and for the AMFI kext to respect that arg, csrutil must have `csrutil enable --without-kext --without-nvram` in recovery mode.
In order to set boot-args and for the AMFI kext to respect that arg, run `csrutil enable --without-kext --without-nvram` in recovery mode.
```python
from tinygrad.tensor import Tensor

View File

@ -74,6 +74,7 @@ brew install keith/formulae/dyld-shared-cache-extractor
dyld-shared-cache-extractor /System/Library/dyld/dyld_shared_cache_arm64e /tmp/libraries
cp /tmp/libraries/System/Library/PrivateFrameworks/ANECompiler.framework/Versions/A/ANECompiler .
cp /tmp/libraries/System/Library/PrivateFrameworks/ANEServices.framework/Versions/A/ANEServices .
cp /tmp/libraries/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/Versions/A/AppleNeuralEngine .
```
## Other work
@ -81,4 +82,17 @@ cp /tmp/libraries/System/Library/PrivateFrameworks/ANEServices.framework/Version
```
# sadly also relies on ZinIrRegBitPrintOutDebug
https://github.com/antgroup-arclab/ANETools.git
# sadly looks like we do actually need a direct connection to run hwx files, aned is at the espresso level
* frame #0: 0x00000001c250fecc AppleNeuralEngine`-[_ANEDaemonConnection loadModel:sandboxExtension:options:qos:withReply:]
(lldb) po $x2
_ANEModel: { modelURL=file:///var/folders/l8/38vj8bm52_gfgsqgdn__sh2w0000gn/T/test_F48D9B88-A68D-476F-ADC8-32BDAF9A2498.mlmodelc/ : key={"isegment":0,"inputs":{"image":{"shape":[1,1,1,64,1]},"image2":{"shape":[1,1,1,64,1]}},"outputs":{"probs":{"shape":[1,1,1,64,1]}}} : string_id=0x00000000 : program=(null) : state=1 : programHandle=0 : intermediateBufferHandle=0 : queueDepth=0 : attr={
} : perfStatsMask=0}
```
## Choices
* Disable amfid (breaks vscode)
* Patch amfid to allow restricted entitlements
* Sign with a "provisioning profile" to allow the entitlement
* Patch the ANE kext to not require a special entitlement (this is ideal, as we don't need to resign python)

102
accel/ane/amfi/new_patch.py Normal file
View File

@ -0,0 +1,102 @@
import ctypes
from subprocess import check_output
from hexdump import hexdump
def get_pid(name):
try:
output = check_output(["pgrep", name])
return int(output)
except:
return None
from ctypes.util import find_library
libc = ctypes.CDLL(find_library('c'))
amfid_pid = get_pid("amfid")
task = ctypes.c_uint32()
mytask = libc.mach_task_self()
ret = libc.task_for_pid(mytask, ctypes.c_int(amfid_pid), ctypes.pointer(task))
print(amfid_pid, ret, task, mytask)
#myport = libc.mach_task_self()
class vm_region_submap_short_info_data_64(ctypes.Structure):
_pack_ = 1
_fields_ = [
("protection", ctypes.c_uint32),
("max_protection", ctypes.c_uint32),
("inheritance", ctypes.c_uint32),
("offset", ctypes.c_ulonglong),
("user_tag", ctypes.c_uint32),
("ref_count", ctypes.c_uint32),
("shadow_depth", ctypes.c_uint16),
("external_pager", ctypes.c_byte),
("share_mode", ctypes.c_byte),
("is_submap", ctypes.c_uint32),
("behavior", ctypes.c_uint32),
("object_id", ctypes.c_uint32),
("user_wired_count", ctypes.c_uint32),
]
submap_info_size = ctypes.sizeof(vm_region_submap_short_info_data_64) // 4
address = ctypes.c_ulong(0)
mapsize = ctypes.c_ulong(0)
count = ctypes.c_uint32(submap_info_size)
sub_info = vm_region_submap_short_info_data_64()
depth = 0
c_depth = ctypes.c_uint32(depth)
for i in range(1):
ret = libc.mach_vm_region_recurse(task,
ctypes.pointer(address), ctypes.pointer(mapsize),
ctypes.pointer(c_depth), ctypes.pointer(sub_info),
ctypes.pointer(count))
print("aslr", hex(ret), hex(address.value), mapsize, count, sub_info.protection)
#address.value += mapsize.value
#exit(0)
patch_address = address.value + 0x8e38
patch = b"\x00\x00\x80\xd2"
pdata = ctypes.c_void_p(0)
data_cnt = ctypes.c_uint32(0)
ret = libc.mach_vm_read(task, ctypes.c_ulong(patch_address), 4, ctypes.pointer(pdata), ctypes.pointer(data_cnt))
buf = ctypes.string_at(pdata.value, data_cnt.value)
hexdump(buf)
#ret = libc.mach_vm_wire(mytask, task, patch_address, 4, 3)
#print(ret)
#exit(0)
"""
ret = libc.mach_vm_read(task, address, mapsize, ctypes.pointer(pdata), ctypes.pointer(data_cnt))
buf = ctypes.string_at(pdata.value, data_cnt.value)
hexdump(buf)
ret = libc.mach_vm_deallocate(task, address, mapsize)
print("mach_vm_deallocate", ret)
ret = libc.mach_vm_allocate(task, ctypes.pointer(address), mapsize, 0)
print("mach_vm_allocate", ret)
"""
ret = libc.mach_vm_protect(task, ctypes.c_ulong(patch_address), 4, True, 3)
print("protect", ret)
longptr = ctypes.POINTER(ctypes.c_ulong)
#shellcodePtr = ctypes.cast(buf, longptr)
#ret = libc.mach_vm_write(task, address, shellcodePtr, len(buf))
#print("write", ret)
shellcodePtr = ctypes.cast(patch, longptr)
ret = libc.mach_vm_write(task, ctypes.c_ulong(patch_address), shellcodePtr, len(buf))
print("write", ret)
#libc.mach_vm_write.argtypes = [ctypes.c_uint32, ctypes.c_ulong, longptr, ctypes.c_uint32]
#libc.mach_vm_write.restype = ctypes.c_uint32
#ret = libc.mach_vm_write(task, ctypes.c_ulong(patch_address), shellcodePtr, len(patch))
ret = libc.mach_vm_protect(task, ctypes.c_ulong(patch_address), 4, False, 5)
print("protect", ret)