From bad0ff60b72e995f11e4f0cc9aa4a230a57b858d Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Sat, 16 Dec 2023 23:10:50 -0800 Subject: [PATCH] start Qualcomm GPU driver (#2804) * hooking works * working * qcom work * parsing command buffers * proper parse --- disassemblers/adreno/__init__.py | 8 +- extra/qcom_gpu_driver/adreno_pm4.xml | 2265 +++++++++++++++++++++++++ extra/qcom_gpu_driver/gen.sh | 2 + extra/qcom_gpu_driver/msm_kgsl.h | 1451 ++++++++++++++++ extra/qcom_gpu_driver/msm_kgsl.py | 1034 +++++++++++ extra/qcom_gpu_driver/opencl_ioctl.py | 171 ++ 6 files changed, 4928 insertions(+), 3 deletions(-) create mode 100644 extra/qcom_gpu_driver/adreno_pm4.xml create mode 100755 extra/qcom_gpu_driver/gen.sh create mode 100644 extra/qcom_gpu_driver/msm_kgsl.h create mode 100644 extra/qcom_gpu_driver/msm_kgsl.py create mode 100644 extra/qcom_gpu_driver/opencl_ioctl.py diff --git a/disassemblers/adreno/__init__.py b/disassemblers/adreno/__init__.py index ac16b381..b0b3e422 100644 --- a/disassemblers/adreno/__init__.py +++ b/disassemblers/adreno/__init__.py @@ -4,15 +4,17 @@ import pathlib from hexdump import hexdump fxn = None -def disasm(buf): +def disasm_raw(buf): global fxn if fxn is None: shared = pathlib.Path(__file__).parent / "disasm.so" if not shared.is_file(): os.system(f'cd {pathlib.Path(__file__).parent} && gcc -shared disasm-a3xx.c -o disasm.so') fxn = ctypes.CDLL(shared.as_posix())['disasm'] - #hexdump(buf) + fxn(buf, len(buf)) + +def disasm(buf): END = b"\x00\x00\x00\x00\x00\x00\x00\x03" buf = buf[0x510:] # this right? buf = buf.split(END)[0] + END - fxn(buf, len(buf)) + disasm_raw(buf) diff --git a/extra/qcom_gpu_driver/adreno_pm4.xml b/extra/qcom_gpu_driver/adreno_pm4.xml new file mode 100644 index 00000000..1b687eed --- /dev/null +++ b/extra/qcom_gpu_driver/adreno_pm4.xml @@ -0,0 +1,2265 @@ + + + + + + + + + + + Flushes dirty data from UCHE, and also writes a GPU timestamp to + the address if one is provided. + + + + + + + + + + + + + + + + + + + If A6XX_RB_SAMPLE_COUNT_CONTROL.copy is true, writes OQ Z passed + sample counts to RB_SAMPLE_COUNT_ADDR. This writes to main + memory, skipping UCHE. + + + + + + Writes the GPU timestamp to the address that follows, once RB + access and flushes are complete. + + + + + + + + + + + + + + + + + + + + + Invalidates depth attachment data from the CCU. We assume this + happens in the last stage. + + + + + Invalidates color attachment data from the CCU. We assume this + happens in the last stage. + + + + + Flushes the small cache used by CP_EVENT_WRITE::BLIT (which, + along with its registers, would be better named RESOLVE). + + + + + Flushes depth attachment data from the CCU. We assume this + happens in the last stage. + + + + + Flushes color attachment data from the CCU. We assume this + happens in the last stage. + + + + + 2D blit to resolve GMEM to system memory (skipping CCU) at the + end of a render pass. Compare to CP_BLIT's BLIT_OP_SCALE for + more general blitting. + + + + + Clears based on GRAS_LRZ_CNTL configuration, could clear + fast-clear buffer or LRZ direction. + LRZ direction is stored at lrz_fc_offset + 0x200, has 1 byte which + could be expressed by enum: + CUR_DIR_DISABLED = 0x0 + CUR_DIR_GE = 0x1 + CUR_DIR_LE = 0x2 + CUR_DIR_UNSET = 0x3 + Clear of direction means setting the direction to CUR_DIR_UNSET. + + + + + + + + + + + + + + + Invalidates UCHE. + + + + + + + + Doesn't seem to do anything + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + initialize CP's micro-engine + + skip N 32-bit words to get to the next packet + + + indirect buffer dispatch. prefetch parser uses this packet + type to determine whether to pre-fetch the IB + + + + + + Takes the same arguments as CP_INDIRECT_BUFFER, but jumps to + another buffer at the same level. Must be at the end of IB, and + doesn't work with draw state IB's. + + + indirect buffer dispatch. same as IB, but init is pipelined + + + Waits for the IDLE state of the engine before further drawing. + This is pipelined, so the CP may continue. + + + wait until a register or memory location is a specific value + + wait until a register location is equal to a specific value + + wait until a register location is >= a specific value + + wait until a read completes + + wait until all base/size writes from an IB_PFD packet have completed + + + register read/modify/write + + Set binning configuration registers + + + reads register in chip and writes to memory + + write N 32-bit words to memory + + write CP_PROG_COUNTER value to memory + + conditional execution of a sequence of packets + + conditional write to memory or register + + + generate an event that creates a write to memory when completed + + + generate a VS|PS_done event + + generate a cache flush done event + + generate a z_pass done event + + + not sure the real name, but this seems to be what is used for + opencl, instead of CP_DRAW_INDX.. + + + initiate fetch of index buffer and draw + + draw using supplied indices in packet + + initiate fetch of index buffer and binIDs and draw + + initiate fetch of bin IDs and draw using supplied indices + + begin/end initiator for viz query extent processing + + fetch state sub-blocks and initiate shader code DMAs + + load constant into chip and to memory + + load sequencer instruction memory (pointer-based) + + load sequencer instruction memory (code embedded in packet) + + load constants from a location in memory + + selective invalidation of state pointers + + dynamically changes shader instruction memory partition + + sets the 64-bit BIN_MASK register in the PFP + + sets the 64-bit BIN_SELECT register in the PFP + + updates the current context, if needed + + generate interrupt from the command stream + + copy sequencer instruction memory to system memory + + + + + + + + sets draw initiator flags register in PFP, gets bitwise-ORed into + every draw initiator + + + sets the register protection mode + + + + + + load high level sequencer command + + + Conditionally load a IB based on a flag, prefetch enabled + + Conditionally load a IB based on a flag, prefetch disabled + + Load a buffer with pre-fetch enabled + + Set bin (?) + + + test 2 memory locations to dword values specified + + + Write register, ignoring context state for context sensitive registers + + + Record the real-time when this packet is processed by PFP + + + + + + PFP waits until the FIFO between the PFP and the ME is empty + + + + + Used a bit like CP_SET_CONSTANT on a2xx, but can write multiple + groups of registers. Looks like it can be used to create state + objects in GPU memory, and on state change only emit pointer + (via CP_SET_DRAW_STATE), which should be nice for reducing CPU + overhead: + + (A4x) save PM4 stream pointers to execute upon a visible draw + + + + + + + + + + Enable or disable predication globally. Also resets the + predicate to "passing" and the local bit to enabled when + enabling global predication. + + + + + Enable or disable predication locally. Unlike globally enabling + predication, this packet doesn't touch any other state. + Predication only happens when enabled globally and locally and a + predicate has been set. This should be used for internal draws + which aren't supposed to use the predication state: + + CP_DRAW_PRED_ENABLE_LOCAL(0) + ... do draw... + CP_DRAW_PRED_ENABLE_LOCAL(1) + + + + + Latch a draw predicate into the internal register. + + + + + for A4xx + Write to register with address that does not fit into type-0 pkt + + + + copy from ME scratch RAM to a register + + + Copy from REG to ME scratch RAM + + + Wait for memory writes to complete + + + Conditional execution based on register comparison + + + Memory to REG copy + + + + + + + for a5xx + + + + + + Tells CP the current mode of GPU operation + + Instruct CP to set a few internal CP registers + + + + + + + Enables IB2 skipping. If both GLOBAL and LOCAL are 1 and + nothing is left in the visibility stream, then + CP_INDIRECT_BUFFER will be skipped, and draws will early return + from their IB. + + + + + + + + + + + + + + + + + + + + General purpose 2D blit engine for image transfers and mipmap + generation. Reads through UCHE, writes through the CCU cache in + the PS stage. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Write CP_CONTEXT_SWITCH_*_INFO from CP to the following dwords, + and forcibly switch to the indicated context. + + + + + + + + + + These first appear in a650_sqe.bin. They can in theory be used + to loop any sequence of IB1 commands, but in practice they are + used to loop over bins. There is a fixed-size per-iteration + prefix, used to set per-bin state, and then the following IB1 + commands are executed until CP_END_BIN which are always the same + for each iteration and usually contain a list of + CP_INDIRECT_BUFFER calls to IB2 commands which setup state and + execute restore/draw/save commands. This replaces the previous + technique of just repeating the CP_INDIRECT_BUFFER calls and + "unrolling" the loop. + + + + + Make next dword 1 to disable preemption, 0 to re-enable it. + + + + + + + + + Can clear BV/BR counters, or wait until one catches up to another + + Clears, adds to local, or adds to global timestamp + + + + + Write to a scratch memory that is read by CP_REG_TEST with + SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers. + However it uses the same memory space. + + + + + Executes an array of fixed-size command buffers where each + buffer is assumed to have one draw call, skipping buffers with + non-visible draw calls. + + + + Reset various on-chip state used for synchronization + + + + + + Load state, a3xx (and later?) + + + + + + + + + + + + + + + + + inline with the CP_LOAD_STATE packet + + + + + in buffer pointed to by EXT_SRC_ADDR + + + + + + + + + + + + + + + + + + Load state, a4xx+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Load state, a6xx+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + SS6_UBO used by the a6xx vulkan blob with tesselation constants + in this case, EXT_SRC_ADDR is (ubo_id shl 16 | offset) + to load constants from a UBO loaded with DST_OFF = 14 and offset 0, + EXT_SRC_ADDR = 0xe0000 + (offset is a guess, should be in bytes given that maxUniformBufferRange=64k) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DST_OFF same as in CP_LOAD_STATE6 - vec4 VS const at this offset will + be updated for each draw to {draw_id, first_vertex, first_instance, 0} + value of 0 disables it + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Read a 64-bit value at the given address and + test if it equals/doesn't equal 0. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value at offset 0 always seems to be 0x00000000.. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Like CP_SET_BIN_DATA5, but set the pointers as offsets from the + pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful + for Vulkan where these values aren't known when the command + stream is recorded. + + + + + + + + + + + + + + + + + + + + + + + + Modifies DST_REG using two sources that can either be registers + or immediates. If SRC1_ADD is set, then do the following: + + $dst = (($dst & $src0) rot $rotate) + $src1 + + Otherwise: + + $dst = (($dst & $src0) rot $rotate) | $src1 + + Here "rot" means rotate left. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Like CP_REG_TO_MEM, but the memory address to write to can be + offsetted using either one or two registers or scratch + registers. + + + + + + + + + + + + + + + + + + + + + + + + Like CP_REG_TO_MEM, but the memory address to write to can be + offsetted using a DWORD in memory. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Wait until a memory value is greater than or equal to the + reference, using signed comparison. + + + + + + + + + + + + + + + + + + + This uses the same internal comparison as CP_COND_WRITE, + but waits until the comparison is true instead. It busy-loops in + the CP for the given number of cycles before trying again. + + + + + + + + + + + + + + + + + + + + + + + + + + + Waits for REG0 to not be 0 or REG1 to not equal REF + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tell CP the current operation mode, indicates save and restore procedure + + + + + + + + + + + + + + + + + + + + + + + + + + Set internal CP registers, used to indicate context save data addresses + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tests bit in specified register and sets predicate for CP_COND_REG_EXEC. + So: + + opcode: CP_REG_TEST (39) (2 dwords) + { REG = 0xc10 | BIT = 0 } + 0000: 70b90001 00000c10 + opcode: CP_COND_REG_EXEC (47) (3 dwords) + 0000: 70c70002 10000000 00000004 + opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) + + Will execute the CP_INDIRECT_BUFFER only if b0 in the register at + offset 0x0c10 is 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Executes the following DWORDs of commands if the dword at ADDR0 + is not equal to 0 and the dword at ADDR1 is less than REF + (signed comparison). + + + + + + + + + + + + + + + + + + + + + + + + Used by the userspace driver to set various IB's which are + executed during context save/restore for handling + state that isn't restored by the + context switch routine itself. + + + + Executed unconditionally when switching back to the context. + + + + Executed when switching back after switching + away during execution of + a CP_SET_MARKER packet with RM6_YIELD as the + payload *and* the normal save routine was + bypassed for a shorter one. I think this is + connected to the "skipsaverestore" bit set by + the kernel when preempting. + + + + + Executed when switching away from the context, + except for context switches initiated via + CP_YIELD. + + + + + This can only be set by the RB (i.e. the kernel) + and executes with protected mode off, but + is otherwise similar to SAVE_IB. + + Note, kgsl calls this CP_KMD_AMBLE_TYPE + + + + + + + + + + + + + + + + + + + Keep shadow copies of these registers and only set them + when drawing, avoiding redundant writes: + - VPC_CNTL_0 + - HLSQ_CONTROL_1_REG + - HLSQ_UNKNOWN_B980 + + + + Track RB_RENDER_CNTL, and insert a WFI in the following + situation: + - There is a write that disables binning + - There was a draw with binning left enabled, but in + BYPASS mode + Presumably this is a hang workaround? + + + + Do a mysterious CP_EVENT_WRITE 0x3f when the low bit of + the data to write is 0. Used by the Vulkan blob with + PC_MULTIVIEW_CNTL, but this isn't predicated on particular + register(s) like the others. + + + + Tracks GRAS_LRZ_CNTL::GREATER, GRAS_LRZ_CNTL::DIR, and + GRAS_LRZ_DEPTH_VIEW with previous values, and if one of + the following is true: + - GRAS_LRZ_CNTL::GREATER has changed + - GRAS_LRZ_CNTL::DIR has changed, the old value is not + CUR_DIR_GE, and the new value is not CUR_DIR_DISABLED + - GRAS_LRZ_DEPTH_VIEW has changed + then it does a LRZ_FLUSH with GRAS_LRZ_CNTL::ENABLE + forced to 1. + Only exists in a650_sqe.fw. + + + + + + + + + + + + + Note that the SMMU's definition of TTBRn can take different forms + depending on the pgtable format. But a5xx+ only uses aarch64 + format. + + + + + + + + + + Unused, does not apply to aarch64 pgtable format + + + + + + + + + + + + + Size of prefix for each bin. For each bin index i, the + prefix commands at PREFIX_ADDR + i * PREFIX_DWORDS are + executed in an IB2 before the IB1 commands following + this packet. + + + + Number of dwords after this packet until CP_END_BIN + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Best guess is that it is a faster way to fetch all the VSC_STATE registers + and keep them in a local scratch memory instead of fetching every time + when skipping IBs. + + + + + + Scratch memory size is 48 dwords` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/extra/qcom_gpu_driver/gen.sh b/extra/qcom_gpu_driver/gen.sh new file mode 100755 index 00000000..ad5d8b2f --- /dev/null +++ b/extra/qcom_gpu_driver/gen.sh @@ -0,0 +1,2 @@ +#!/usr/bin/sh +clang2py msm_kgsl.h -o msm_kgsl.py \ No newline at end of file diff --git a/extra/qcom_gpu_driver/msm_kgsl.h b/extra/qcom_gpu_driver/msm_kgsl.h new file mode 100644 index 00000000..cae035d3 --- /dev/null +++ b/extra/qcom_gpu_driver/msm_kgsl.h @@ -0,0 +1,1451 @@ +#ifndef _UAPI_MSM_KGSL_H +#define _UAPI_MSM_KGSL_H + +#include +#include +#define size_t unsigned long +#define uint64_t unsigned long + +/* + * The KGSL version has proven not to be very useful in userspace if features + * are cherry picked into other trees out of order so it is frozen as of 3.14. + * It is left here for backwards compatabilty and as a reminder that + * software releases are never linear. Also, I like pie. + */ + +#define KGSL_VERSION_MAJOR 3 +#define KGSL_VERSION_MINOR 14 + +/* + * We have traditionally mixed context and issueibcmds / command batch flags + * together into a big flag stew. This worked fine until we started adding a + * lot more command batch flags and we started running out of bits. Turns out + * we have a bit of room in the context type / priority mask that we could use + * for command batches, but that means we need to split out the flags into two + * coherent sets. + * + * If any future definitions are for both context and cmdbatch add both defines + * and link the cmdbatch to the context define as we do below. Otherwise feel + * free to add exclusive bits to either set. + */ + +/* --- context flags --- */ +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 +#define KGSL_CONTEXT_USER_GENERATED_TS 0x00000080 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_END_OF_FRAME 0x00000100 +#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SYNC 0x00000400 +#define KGSL_CONTEXT_PWR_CONSTRAINT 0x00000800 + +#define KGSL_CONTEXT_PRIORITY_MASK 0x0000F000 +#define KGSL_CONTEXT_PRIORITY_SHIFT 12 +#define KGSL_CONTEXT_PRIORITY_UNDEF 0 + +#define KGSL_CONTEXT_IFH_NOP 0x00010000 +#define KGSL_CONTEXT_SECURE 0x00020000 + +#define KGSL_CONTEXT_PREEMPT_STYLE_MASK 0x0E000000 +#define KGSL_CONTEXT_PREEMPT_STYLE_SHIFT 25 +#define KGSL_CONTEXT_PREEMPT_STYLE_DEFAULT 0x0 +#define KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER 0x1 +#define KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN 0x2 + +#define KGSL_CONTEXT_TYPE_MASK 0x01F00000 +#define KGSL_CONTEXT_TYPE_SHIFT 20 +#define KGSL_CONTEXT_TYPE_ANY 0 +#define KGSL_CONTEXT_TYPE_GL 1 +#define KGSL_CONTEXT_TYPE_CL 2 +#define KGSL_CONTEXT_TYPE_C2D 3 +#define KGSL_CONTEXT_TYPE_RS 4 +#define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E + +#define KGSL_CONTEXT_INVALID 0xffffffff + +/* + * --- command batch flags --- + * The bits that are linked to a KGSL_CONTEXT equivalent are either legacy + * definitions or bits that are valid for both contexts and cmdbatches. To be + * safe the other 8 bits that are still available in the context field should be + * omitted here in case we need to share - the other bits are available for + * cmdbatch only flags as needed + */ +#define KGSL_CMDBATCH_MEMLIST 0x00000001 +#define KGSL_CMDBATCH_MARKER 0x00000002 +#define KGSL_CMDBATCH_SUBMIT_IB_LIST KGSL_CONTEXT_SUBMIT_IB_LIST /* 0x004 */ +#define KGSL_CMDBATCH_CTX_SWITCH KGSL_CONTEXT_CTX_SWITCH /* 0x008 */ +#define KGSL_CMDBATCH_PROFILING 0x00000010 +#define KGSL_CMDBATCH_PROFILING_KTIME 0x00000020 +#define KGSL_CMDBATCH_END_OF_FRAME KGSL_CONTEXT_END_OF_FRAME /* 0x100 */ +#define KGSL_CMDBATCH_SYNC KGSL_CONTEXT_SYNC /* 0x400 */ +#define KGSL_CMDBATCH_PWR_CONSTRAINT KGSL_CONTEXT_PWR_CONSTRAINT /* 0x800 */ + +/* + * Reserve bits [16:19] and bits [28:31] for possible bits shared between + * contexts and command batches. Update this comment as new flags are added. + */ + +/* + * gpu_command_object flags - these flags communicate the type of command or + * memory object being submitted for a GPU command + */ + +/* Flags for GPU command objects */ +#define KGSL_CMDLIST_IB 0x00000001U +#define KGSL_CMDLIST_CTXTSWITCH_PREAMBLE 0x00000002U +#define KGSL_CMDLIST_IB_PREAMBLE 0x00000004U + +/* Flags for GPU command memory objects */ +#define KGSL_OBJLIST_MEMOBJ 0x00000008U +#define KGSL_OBJLIST_PROFILE 0x00000010U + +/* Flags for GPU command sync points */ +#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0 +#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1 + +/* --- Memory allocation flags --- */ + +/* General allocation hints */ +#define KGSL_MEMFLAGS_SECURE 0x00000008ULL +#define KGSL_MEMFLAGS_GPUREADONLY 0x01000000U +#define KGSL_MEMFLAGS_GPUWRITEONLY 0x02000000U +#define KGSL_MEMFLAGS_FORCE_32BIT 0x100000000ULL + +/* Memory caching hints */ +#define KGSL_CACHEMODE_MASK 0x0C000000U +#define KGSL_CACHEMODE_SHIFT 26 + +#define KGSL_CACHEMODE_WRITECOMBINE 0 +#define KGSL_CACHEMODE_UNCACHED 1 +#define KGSL_CACHEMODE_WRITETHROUGH 2 +#define KGSL_CACHEMODE_WRITEBACK 3 + +#define KGSL_MEMFLAGS_USE_CPU_MAP 0x10000000ULL + +/* Memory types for which allocations are made */ +#define KGSL_MEMTYPE_MASK 0x0000FF00 +#define KGSL_MEMTYPE_SHIFT 8 + +#define KGSL_MEMTYPE_OBJECTANY 0 +#define KGSL_MEMTYPE_FRAMEBUFFER 1 +#define KGSL_MEMTYPE_RENDERBUFFER 2 +#define KGSL_MEMTYPE_ARRAYBUFFER 3 +#define KGSL_MEMTYPE_ELEMENTARRAYBUFFER 4 +#define KGSL_MEMTYPE_VERTEXARRAYBUFFER 5 +#define KGSL_MEMTYPE_TEXTURE 6 +#define KGSL_MEMTYPE_SURFACE 7 +#define KGSL_MEMTYPE_EGL_SURFACE 8 +#define KGSL_MEMTYPE_GL 9 +#define KGSL_MEMTYPE_CL 10 +#define KGSL_MEMTYPE_CL_BUFFER_MAP 11 +#define KGSL_MEMTYPE_CL_BUFFER_NOMAP 12 +#define KGSL_MEMTYPE_CL_IMAGE_MAP 13 +#define KGSL_MEMTYPE_CL_IMAGE_NOMAP 14 +#define KGSL_MEMTYPE_CL_KERNEL_STACK 15 +#define KGSL_MEMTYPE_COMMAND 16 +#define KGSL_MEMTYPE_2D 17 +#define KGSL_MEMTYPE_EGL_IMAGE 18 +#define KGSL_MEMTYPE_EGL_SHADOW 19 +#define KGSL_MEMTYPE_MULTISAMPLE 20 +#define KGSL_MEMTYPE_KERNEL 255 + +/* + * Alignment hint, passed as the power of 2 exponent. + * i.e 4k (2^12) would be 12, 64k (2^16)would be 16. + */ +#define KGSL_MEMALIGN_MASK 0x00FF0000 +#define KGSL_MEMALIGN_SHIFT 16 + +enum kgsl_user_mem_type { + KGSL_USER_MEM_TYPE_PMEM = 0x00000000, + KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + /* + * ION type is retained for backwards compatibilty but Ion buffers are + * dma-bufs so try to use that naming if we can + */ + KGSL_USER_MEM_TYPE_DMABUF = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000007, +}; +#define KGSL_MEMFLAGS_USERMEM_MASK 0x000000e0 +#define KGSL_MEMFLAGS_USERMEM_SHIFT 5 + +/* + * Unfortunately, enum kgsl_user_mem_type starts at 0 which does not + * leave a good value for allocated memory. In the flags we use + * 0 to indicate allocated memory and thus need to add 1 to the enum + * values. + */ +#define KGSL_USERMEM_FLAG(x) (((x) + 1) << KGSL_MEMFLAGS_USERMEM_SHIFT) + +#define KGSL_MEMFLAGS_NOT_USERMEM 0 +#define KGSL_MEMFLAGS_USERMEM_PMEM KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_PMEM) +#define KGSL_MEMFLAGS_USERMEM_ASHMEM \ + KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ASHMEM) +#define KGSL_MEMFLAGS_USERMEM_ADDR KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ADDR) +#define KGSL_MEMFLAGS_USERMEM_ION KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ION) + +/* --- generic KGSL flag values --- */ + +#define KGSL_FLAGS_NORMALMODE 0x00000000 +#define KGSL_FLAGS_SAFEMODE 0x00000001 +#define KGSL_FLAGS_INITIALIZED0 0x00000002 +#define KGSL_FLAGS_INITIALIZED 0x00000004 +#define KGSL_FLAGS_STARTED 0x00000008 +#define KGSL_FLAGS_ACTIVE 0x00000010 +#define KGSL_FLAGS_RESERVED0 0x00000020 +#define KGSL_FLAGS_RESERVED1 0x00000040 +#define KGSL_FLAGS_RESERVED2 0x00000080 +#define KGSL_FLAGS_SOFT_RESET 0x00000100 +#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200 + +/* Server Side Sync Timeout in milliseconds */ +#define KGSL_SYNCOBJ_SERVER_TIMEOUT 2000 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; + +#define KGSL_CONVERT_TO_MBPS(val) \ + (val*1000*1000U) + +/* device id */ +enum kgsl_deviceid { + KGSL_DEVICE_3D0 = 0x00000000, + KGSL_DEVICE_MAX +}; + +struct kgsl_devinfo { + + unsigned int device_id; + /* chip revision id + * coreid:8 majorrev:8 minorrev:8 patch:8 + */ + unsigned int chip_id; + unsigned int mmu_enabled; + unsigned long gmem_gpubaseaddr; + /* + * This field contains the adreno revision + * number 200, 205, 220, etc... + */ + unsigned int gpu_id; + size_t gmem_sizebytes; +}; + +/* + * struct kgsl_devmemstore - this structure defines the region of memory + * that can be mmap()ed from this driver. The timestamp fields are volatile + * because they are written by the GPU + * @soptimestamp: Start of pipeline timestamp written by GPU before the + * commands in concern are processed + * @sbz: Unused, kept for 8 byte alignment + * @eoptimestamp: End of pipeline timestamp written by GPU after the + * commands in concern are processed + * @sbz2: Unused, kept for 8 byte alignment + * @preempted: Indicates if the context was preempted + * @sbz3: Unused, kept for 8 byte alignment + * @ref_wait_ts: Timestamp on which to generate interrupt, unused now. + * @sbz4: Unused, kept for 8 byte alignment + * @current_context: The current context the GPU is working on + * @sbz5: Unused, kept for 8 byte alignment + */ +struct kgsl_devmemstore { + volatile unsigned int soptimestamp; + unsigned int sbz; + volatile unsigned int eoptimestamp; + unsigned int sbz2; + volatile unsigned int preempted; + unsigned int sbz3; + volatile unsigned int ref_wait_ts; + unsigned int sbz4; + unsigned int current_context; + unsigned int sbz5; +}; + +#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \ + ((ctxt_id)*sizeof(struct kgsl_devmemstore) + \ + offsetof(struct kgsl_devmemstore, field)) + +/* timestamp id*/ +enum kgsl_timestamp_type { + KGSL_TIMESTAMP_CONSUMED = 0x00000001, /* start-of-pipeline timestamp */ + KGSL_TIMESTAMP_RETIRED = 0x00000002, /* end-of-pipeline timestamp*/ + KGSL_TIMESTAMP_QUEUED = 0x00000003, +}; + +/* property types - used with kgsl_device_getproperty */ +#define KGSL_PROP_DEVICE_INFO 0x1 +#define KGSL_PROP_DEVICE_SHADOW 0x2 +#define KGSL_PROP_DEVICE_POWER 0x3 +#define KGSL_PROP_SHMEM 0x4 +#define KGSL_PROP_SHMEM_APERTURES 0x5 +#define KGSL_PROP_MMU_ENABLE 0x6 +#define KGSL_PROP_INTERRUPT_WAITS 0x7 +#define KGSL_PROP_VERSION 0x8 +#define KGSL_PROP_GPU_RESET_STAT 0x9 +#define KGSL_PROP_PWRCTRL 0xE +#define KGSL_PROP_PWR_CONSTRAINT 0x12 +#define KGSL_PROP_UCHE_GMEM_VADDR 0x13 +#define KGSL_PROP_SP_GENERIC_MEM 0x14 +#define KGSL_PROP_UCODE_VERSION 0x15 +#define KGSL_PROP_GPMU_VERSION 0x16 +#define KGSL_PROP_DEVICE_BITNESS 0x18 + +struct kgsl_shadowprop { + unsigned long gpuaddr; + size_t size; + unsigned int flags; /* contains KGSL_FLAGS_ values */ +}; + +struct kgsl_version { + unsigned int drv_major; + unsigned int drv_minor; + unsigned int dev_major; + unsigned int dev_minor; +}; + +struct kgsl_sp_generic_mem { + uint64_t local; + uint64_t pvt; +}; + +struct kgsl_ucode_version { + unsigned int pfp; + unsigned int pm4; +}; + +struct kgsl_gpmu_version { + unsigned int major; + unsigned int minor; + unsigned int features; +}; + +/* Performance counter groups */ + +#define KGSL_PERFCOUNTER_GROUP_CP 0x0 +#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1 +#define KGSL_PERFCOUNTER_GROUP_PC 0x2 +#define KGSL_PERFCOUNTER_GROUP_VFD 0x3 +#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4 +#define KGSL_PERFCOUNTER_GROUP_VPC 0x5 +#define KGSL_PERFCOUNTER_GROUP_TSE 0x6 +#define KGSL_PERFCOUNTER_GROUP_RAS 0x7 +#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8 +#define KGSL_PERFCOUNTER_GROUP_TP 0x9 +#define KGSL_PERFCOUNTER_GROUP_SP 0xA +#define KGSL_PERFCOUNTER_GROUP_RB 0xB +#define KGSL_PERFCOUNTER_GROUP_PWR 0xC +#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD +#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE +#define KGSL_PERFCOUNTER_GROUP_MH 0xF +#define KGSL_PERFCOUNTER_GROUP_PA_SU 0x10 +#define KGSL_PERFCOUNTER_GROUP_SQ 0x11 +#define KGSL_PERFCOUNTER_GROUP_SX 0x12 +#define KGSL_PERFCOUNTER_GROUP_TCF 0x13 +#define KGSL_PERFCOUNTER_GROUP_TCM 0x14 +#define KGSL_PERFCOUNTER_GROUP_TCR 0x15 +#define KGSL_PERFCOUNTER_GROUP_L2 0x16 +#define KGSL_PERFCOUNTER_GROUP_VSC 0x17 +#define KGSL_PERFCOUNTER_GROUP_CCU 0x18 +#define KGSL_PERFCOUNTER_GROUP_LRZ 0x19 +#define KGSL_PERFCOUNTER_GROUP_CMP 0x1A +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON 0x1B +#define KGSL_PERFCOUNTER_GROUP_SP_PWR 0x1C +#define KGSL_PERFCOUNTER_GROUP_TP_PWR 0x1D +#define KGSL_PERFCOUNTER_GROUP_RB_PWR 0x1E +#define KGSL_PERFCOUNTER_GROUP_CCU_PWR 0x1F +#define KGSL_PERFCOUNTER_GROUP_UCHE_PWR 0x20 +#define KGSL_PERFCOUNTER_GROUP_CP_PWR 0x21 +#define KGSL_PERFCOUNTER_GROUP_GPMU_PWR 0x22 +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR 0x23 +#define KGSL_PERFCOUNTER_GROUP_MAX 0x24 + +#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF +#define KGSL_PERFCOUNTER_BROKEN 0xFFFFFFFE + +/* structure holds list of ibs */ +struct kgsl_ibdesc { + unsigned long gpuaddr; + unsigned long __pad; + size_t sizedwords; + unsigned int ctrl; +}; + +/** + * struct kgsl_cmdbatch_profiling_buffer + * @wall_clock_s: Ringbuffer submission time (seconds). + * If KGSL_CMDBATCH_PROFILING_KTIME is set, time is provided + * in kernel clocks, otherwise wall clock time is used. + * @wall_clock_ns: Ringbuffer submission time (nanoseconds). + * If KGSL_CMDBATCH_PROFILING_KTIME is set time is provided + * in kernel clocks, otherwise wall clock time is used. + * @gpu_ticks_queued: GPU ticks at ringbuffer submission + * @gpu_ticks_submitted: GPU ticks when starting cmdbatch execution + * @gpu_ticks_retired: GPU ticks when finishing cmdbatch execution + * + * This structure defines the profiling buffer used to measure cmdbatch + * execution time + */ +struct kgsl_cmdbatch_profiling_buffer { + uint64_t wall_clock_s; + uint64_t wall_clock_ns; + uint64_t gpu_ticks_queued; + uint64_t gpu_ticks_submitted; + uint64_t gpu_ticks_retired; +}; + +/* ioctls */ +#define KGSL_IOC_TYPE 0x09 + +/* get misc info about the GPU + type should be a value from enum kgsl_property_type + value points to a structure that varies based on type + sizebytes is sizeof() that structure + for KGSL_PROP_DEVICE_INFO, use struct kgsl_devinfo + this structure contaings hardware versioning info. + for KGSL_PROP_DEVICE_SHADOW, use struct kgsl_shadowprop + this is used to find mmap() offset and sizes for mapping + struct kgsl_memstore into userspace. +*/ +struct kgsl_device_getproperty { + unsigned int type; + void __user *value; + size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty) + +/* IOCTL_KGSL_DEVICE_READ (0x3) - removed 03/2012 + */ + +/* block until the GPU has executed past a given timestamp + * timeout is in milliseconds. + */ +struct kgsl_device_waittimestamp { + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) + +struct kgsl_device_waittimestamp_ctxtid { + unsigned int context_id; + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) + +/* DEPRECATED: issue indirect commands to the GPU. + * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE + * ibaddr and sizedwords must specify a subset of a buffer created + * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM + * flags may be a mask of KGSL_CONTEXT_ values + * timestamp is a returned counter value which can be passed to + * other ioctls to determine when the commands have been executed by + * the GPU. + * + * This fucntion is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS + * instead + */ +struct kgsl_ringbuffer_issueibcmds { + unsigned int drawctxt_id; + unsigned long ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /*output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds) + +/* read the most recently executed timestamp value + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_readtimestamp { + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP \ + _IOWR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +/* free memory when the GPU reaches a given timestamp. + * gpuaddr specify a memory region created by a + * IOCTL_KGSL_SHAREDMEM_FROM_PMEM call + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_freememontimestamp { + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* Previous versions of this header had incorrectly defined + IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP as a read-only ioctl instead + of a write only ioctl. To ensure binary compatability, the following + #define will be used to intercept the incorrect ioctl +*/ + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* create a draw context, which is used to preserve GPU state. + * The flags field may contain a mask KGSL_CONTEXT_* values + */ +struct kgsl_drawctxt_create { + unsigned int flags; + unsigned int drawctxt_id; /*output param */ +}; + +#define IOCTL_KGSL_DRAWCTXT_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x13, struct kgsl_drawctxt_create) + +/* destroy a draw context */ +struct kgsl_drawctxt_destroy { + unsigned int drawctxt_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_DESTROY \ + _IOW(KGSL_IOC_TYPE, 0x14, struct kgsl_drawctxt_destroy) + +/* add a block of pmem, fb, ashmem or user allocated address + * into the GPU address space */ +struct kgsl_map_user_mem { + int fd; + unsigned long gpuaddr; /*output param */ + size_t len; + size_t offset; + unsigned long hostptr; /*input param */ + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem) + +struct kgsl_cmdstream_readtimestamp_ctxtid { + unsigned int context_id; + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \ + _IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid) + +struct kgsl_cmdstream_freememontimestamp_ctxtid { + unsigned int context_id; + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid) + +/* add a block of pmem or fb into the GPU address space */ +struct kgsl_sharedmem_from_pmem { + int pmem_fd; + unsigned long gpuaddr; /*output param */ + unsigned int len; + unsigned int offset; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_PMEM \ + _IOWR(KGSL_IOC_TYPE, 0x20, struct kgsl_sharedmem_from_pmem) + +/* remove memory from the GPU's address space */ +struct kgsl_sharedmem_free { + unsigned long gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FREE \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free) + +struct kgsl_cff_user_event { + unsigned char cff_opcode; + unsigned int op1; + unsigned int op2; + unsigned int op3; + unsigned int op4; + unsigned int op5; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_CFF_USER_EVENT \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_cff_user_event) + +struct kgsl_gmem_desc { + unsigned int x; + unsigned int y; + unsigned int width; + unsigned int height; + unsigned int pitch; +}; + +struct kgsl_buffer_desc { + void *hostptr; + unsigned long gpuaddr; + int size; + unsigned int format; + unsigned int pitch; + unsigned int enabled; +}; + +struct kgsl_bind_gmem_shadow { + unsigned int drawctxt_id; + struct kgsl_gmem_desc gmem_desc; + unsigned int shadow_x; + unsigned int shadow_y; + struct kgsl_buffer_desc shadow_buffer; + unsigned int buffer_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_BIND_GMEM_SHADOW \ + _IOW(KGSL_IOC_TYPE, 0x22, struct kgsl_bind_gmem_shadow) + +/* add a block of memory into the GPU address space */ + +/* + * IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC deprecated 09/2012 + * use IOCTL_KGSL_GPUMEM_ALLOC instead + */ + +struct kgsl_sharedmem_from_vmalloc { + unsigned long gpuaddr; /*output param */ + unsigned int hostptr; + unsigned int flags; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc) + +/* + * This is being deprecated in favor of IOCTL_KGSL_GPUMEM_CACHE_SYNC which + * supports both directions (flush and invalidate). This code will still + * work, but by definition it will do a flush of the cache which might not be + * what you want to have happen on a buffer following a GPU operation. It is + * safer to go with IOCTL_KGSL_GPUMEM_CACHE_SYNC + */ + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free) + +struct kgsl_drawctxt_set_bin_base_offset { + unsigned int drawctxt_id; + unsigned int offset; +}; + +#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET \ + _IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset) + +enum kgsl_cmdwindow_type { + KGSL_CMDWINDOW_MIN = 0x00000000, + KGSL_CMDWINDOW_2D = 0x00000000, + KGSL_CMDWINDOW_3D = 0x00000001, /* legacy */ + KGSL_CMDWINDOW_MMU = 0x00000002, + KGSL_CMDWINDOW_ARBITER = 0x000000FF, + KGSL_CMDWINDOW_MAX = 0x000000FF, +}; + +/* write to the command window */ +struct kgsl_cmdwindow_write { + enum kgsl_cmdwindow_type target; + unsigned int addr; + unsigned int data; +}; + +#define IOCTL_KGSL_CMDWINDOW_WRITE \ + _IOW(KGSL_IOC_TYPE, 0x2e, struct kgsl_cmdwindow_write) + +struct kgsl_gpumem_alloc { + unsigned long gpuaddr; /* output param */ + size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc) + +struct kgsl_cff_syncmem { + unsigned long gpuaddr; + size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem) + +/* + * A timestamp event allows the user space to register an action following an + * expired timestamp. Note IOCTL_KGSL_TIMESTAMP_EVENT has been redefined to + * _IOWR to support fences which need to return a fd for the priv parameter. + */ + +struct kgsl_timestamp_event { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + void __user *priv; /* Pointer to the event specific blob */ + size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_OLD \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_timestamp_event) + +/* A genlock timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_GENLOCK 1 + +struct kgsl_timestamp_event_genlock { + int handle; /* Handle of the genlock lock to release */ +}; + +/* A fence timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_FENCE 2 + +struct kgsl_timestamp_event_fence { + int fence_fd; /* Fence to signal */ +}; + +/* + * Set a property within the kernel. Uses the same structure as + * IOCTL_KGSL_GETPROPERTY + */ + +#define IOCTL_KGSL_SETPROPERTY \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty) + +#define IOCTL_KGSL_TIMESTAMP_EVENT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event) + +/** + * struct kgsl_gpumem_alloc_id - argument to IOCTL_KGSL_GPUMEM_ALLOC_ID + * @id: returned id value for this allocation. + * @flags: mask of KGSL_MEM* values requested and actual flags on return. + * @size: requested size of the allocation and actual size on return. + * @mmapsize: returned size to pass to mmap() which may be larger than 'size' + * @gpuaddr: returned GPU address for the allocation + * + * Allocate memory for access by the GPU. The flags and size fields are echoed + * back by the kernel, so that the caller can know if the request was + * adjusted. + * + * Supported flags: + * KGSL_MEMFLAGS_GPUREADONLY: the GPU will be unable to write to the buffer + * KGSL_MEMTYPE*: usage hint for debugging aid + * KGSL_MEMALIGN*: alignment hint, may be ignored or adjusted by the kernel. + * KGSL_MEMFLAGS_USE_CPU_MAP: If set on call and return, the returned GPU + * address will be 0. Calling mmap() will set the GPU address. + */ +struct kgsl_gpumem_alloc_id { + unsigned int id; + unsigned int flags; + size_t size; + size_t mmapsize; + unsigned long gpuaddr; +/* private: reserved for future use*/ + unsigned long __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id) + +/** + * struct kgsl_gpumem_free_id - argument to IOCTL_KGSL_GPUMEM_FREE_ID + * @id: GPU allocation id to free + * + * Free an allocation by id, in case a GPU address has not been assigned or + * is unknown. Freeing an allocation by id with this ioctl or by GPU address + * with IOCTL_KGSL_SHAREDMEM_FREE are equivalent. + */ +struct kgsl_gpumem_free_id { + unsigned int id; +/* private: reserved for future use*/ + unsigned int __pad; +}; + +#define IOCTL_KGSL_GPUMEM_FREE_ID \ + _IOWR(KGSL_IOC_TYPE, 0x35, struct kgsl_gpumem_free_id) + +/** + * struct kgsl_gpumem_get_info - argument to IOCTL_KGSL_GPUMEM_GET_INFO + * @gpuaddr: GPU address to query. Also set on return. + * @id: GPU allocation id to query. Also set on return. + * @flags: returned mask of KGSL_MEM* values. + * @size: returned size of the allocation. + * @mmapsize: returned size to pass mmap(), which may be larger than 'size' + * @useraddr: returned address of the userspace mapping for this buffer + * + * This ioctl allows querying of all user visible attributes of an existing + * allocation, by either the GPU address or the id returned by a previous + * call to IOCTL_KGSL_GPUMEM_ALLOC_ID. Legacy allocation ioctls may not + * return all attributes so this ioctl can be used to look them up if needed. + * + */ +struct kgsl_gpumem_get_info { + unsigned long gpuaddr; + unsigned int id; + unsigned int flags; + size_t size; + size_t mmapsize; + unsigned long useraddr; +/* private: reserved for future use*/ + unsigned long __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO\ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info) + +/** + * struct kgsl_gpumem_sync_cache - argument to IOCTL_KGSL_GPUMEM_SYNC_CACHE + * @gpuaddr: GPU address of the buffer to sync. + * @id: id of the buffer to sync. Either gpuaddr or id is sufficient. + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * @offset: offset into the buffer + * @length: number of bytes starting from offset to perform + * the cache operation on + * + * Sync the L2 cache for memory headed to and from the GPU - this replaces + * KGSL_SHAREDMEM_FLUSH_CACHE since it can handle cache management for both + * directions + * + */ +struct kgsl_gpumem_sync_cache { + unsigned long gpuaddr; + unsigned int id; + unsigned int op; + size_t offset; + size_t length; +}; + +#define KGSL_GPUMEM_CACHE_CLEAN (1 << 0) +#define KGSL_GPUMEM_CACHE_TO_GPU KGSL_GPUMEM_CACHE_CLEAN + +#define KGSL_GPUMEM_CACHE_INV (1 << 1) +#define KGSL_GPUMEM_CACHE_FROM_GPU KGSL_GPUMEM_CACHE_INV + +#define KGSL_GPUMEM_CACHE_FLUSH \ + (KGSL_GPUMEM_CACHE_CLEAN | KGSL_GPUMEM_CACHE_INV) + +/* Flag to ensure backwards compatibility of kgsl_gpumem_sync_cache struct */ +#define KGSL_GPUMEM_CACHE_RANGE (1 << 31U) + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache) + +/** + * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET + * @groupid: Performance counter group ID + * @countable: Countable to select within the group + * @offset: Return offset of the reserved LO counter + * @offset_hi: Return offset of the reserved HI counter + * + * Get an available performance counter from a specified groupid. The offset + * of the performance counter will be returned after successfully assigning + * the countable to the counter for the specified group. An error will be + * returned and an offset of 0 if the groupid is invalid or there are no + * more counters left. After successfully getting a perfcounter, the user + * must call kgsl_perfcounter_put(groupid, contable) when finished with + * the perfcounter to clear up perfcounter resources. + * + */ +struct kgsl_perfcounter_get { + unsigned int groupid; + unsigned int countable; + unsigned int offset; + unsigned int offset_hi; +/* private: reserved for future use */ + unsigned int __pad; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_GET \ + _IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get) + +/** + * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT + * @groupid: Performance counter group ID + * @countable: Countable to release within the group + * + * Put an allocated performance counter to allow others to have access to the + * resource that was previously taken. This is only to be called after + * successfully getting a performance counter from kgsl_perfcounter_get(). + * + */ +struct kgsl_perfcounter_put { + unsigned int groupid; + unsigned int countable; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_PUT \ + _IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group ID + * @countable: Return active countables array + * @size: Size of active countables array + * @max_counters: Return total number counters for the group ID + * + * Query the available performance counters given a groupid. The array + * *countables is used to return the current active countables in counters. + * The size of the array is passed in so the kernel will only write at most + * size or counter->size for the group id. The total number of available + * counters for the group ID is returned in max_counters. + * If the array or size passed in are invalid, then only the maximum number + * of counters will be returned, no data will be written to *countables. + * If the groupid is invalid an error code will be returned. + * + */ +struct kgsl_perfcounter_query { + unsigned int groupid; + /* Array to return the current countable for up to size counters */ + unsigned int __user *countables; + unsigned int count; + unsigned int max_counters; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group IDs + * @countable: Performance counter countable IDs + * @value: Return performance counter reads + * @size: Size of all arrays (groupid/countable pair and return value) + * + * Read in the current value of a performance counter given by the groupid + * and countable. + * + */ + +struct kgsl_perfcounter_read_group { + unsigned int groupid; + unsigned int countable; + unsigned long long value; +}; + +struct kgsl_perfcounter_read { + struct kgsl_perfcounter_read_group __user *reads; + unsigned int count; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read) +/* + * struct kgsl_gpumem_sync_cache_bulk - argument to + * IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK + * @id_list: list of GPU buffer ids of the buffers to sync + * @count: number of GPU buffer ids in id_list + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * + * Sync the cache for memory headed to and from the GPU. Certain + * optimizations can be made on the cache operation based on the total + * size of the working set of memory to be managed. + */ +struct kgsl_gpumem_sync_cache_bulk { + unsigned int __user *id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk) + +/* + * struct kgsl_cmd_syncpoint_timestamp + * @context_id: ID of a KGSL context + * @timestamp: GPU timestamp + * + * This structure defines a syncpoint comprising a context/timestamp pair. A + * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define + * dependencies that must be met before the command can be submitted to the + * hardware + */ +struct kgsl_cmd_syncpoint_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +struct kgsl_cmd_syncpoint_fence { + int fd; +}; + +/** + * struct kgsl_cmd_syncpoint - Define a sync point for a command batch + * @type: type of sync point defined here + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * + * This structure contains pointers defining a specific command sync point. + * The pointer and size should point to a type appropriate structure. + */ +struct kgsl_cmd_syncpoint { + int type; + void __user *priv; + size_t size; +}; + +/* Flag to indicate that the cmdlist may contain memlists */ +#define KGSL_IBDESC_MEMLIST 0x1 + +/* Flag to point out the cmdbatch profiling buffer in the memlist */ +#define KGSL_IBDESC_PROFILING_BUFFER 0x2 + +/** + * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS + * @context_id: KGSL context ID that owns the commands + * @flags: + * @cmdlist: User pointer to a list of kgsl_ibdesc structures + * @numcmds: Number of commands listed in cmdlist + * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures + * @numsyncs: Number of sync points listed in synclist + * @timestamp: On entry the a user defined timestamp, on exist the timestamp + * assigned to the command batch + * + * This structure specifies a command to send to the GPU hardware. This is + * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to + * submit IB lists and it adds sync points to block the IB until the + * dependencies are satisified. This entry point is the new and preferred way + * to submit commands to the GPU. The memory list can be used to specify all + * memory that is referrenced in the current set of commands. + */ + +struct kgsl_submit_commands { + unsigned int context_id; + unsigned int flags; + struct kgsl_ibdesc __user *cmdlist; + unsigned int numcmds; + struct kgsl_cmd_syncpoint __user *synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands) + +/** + * struct kgsl_device_constraint - device constraint argument + * @context_id: KGSL context ID + * @type: type of constraint i.e pwrlevel/none + * @data: constraint data + * @size: size of the constraint data + */ +struct kgsl_device_constraint { + unsigned int type; + unsigned int context_id; + void __user *data; + size_t size; +}; + +/* Constraint Type*/ +#define KGSL_CONSTRAINT_NONE 0 +#define KGSL_CONSTRAINT_PWRLEVEL 1 + +/* PWRLEVEL constraint level*/ +/* set to min frequency */ +#define KGSL_CONSTRAINT_PWR_MIN 0 +/* set to max frequency */ +#define KGSL_CONSTRAINT_PWR_MAX 1 + +struct kgsl_device_constraint_pwrlevel { + unsigned int level; +}; + +/** + * struct kgsl_syncsource_create - Argument to IOCTL_KGSL_SYNCSOURCE_CREATE + * @id: returned id for the syncsource that was created. + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_create { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x40, struct kgsl_syncsource_create) + +/** + * struct kgsl_syncsource_destroy - Argument to IOCTL_KGSL_SYNCSOURCE_DESTROY + * @id: syncsource id to destroy + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_destroy { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_DESTROY \ + _IOWR(KGSL_IOC_TYPE, 0x41, struct kgsl_syncsource_destroy) + +/** + * struct kgsl_syncsource_create_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * @id: syncsource id + * @fence_fd: returned sync_fence fd + * + * Create a fence that may be signaled by userspace by calling + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE. There are no order dependencies between + * these fences. + */ +struct kgsl_syncsource_create_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +/** + * struct kgsl_syncsource_signal_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE + * @id: syncsource id + * @fence_fd: sync_fence fd to signal + * + * Signal a fence that was created by a IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * call using the same syncsource id. This allows a fence to be shared + * to other processes but only signaled by the process owning the fd + * used to create the fence. + */ +#define IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x42, struct kgsl_syncsource_create_fence) + +struct kgsl_syncsource_signal_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x43, struct kgsl_syncsource_signal_fence) + +/** + * struct kgsl_cff_sync_gpuobj - Argument to IOCTL_KGSL_CFF_SYNC_GPUOBJ + * @offset: Offset into the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + */ +struct kgsl_cff_sync_gpuobj { + uint64_t offset; + uint64_t length; + unsigned int id; +}; + +#define IOCTL_KGSL_CFF_SYNC_GPUOBJ \ + _IOW(KGSL_IOC_TYPE, 0x44, struct kgsl_cff_sync_gpuobj) + +/** + * struct kgsl_gpuobj_alloc - Argument to IOCTL_KGSL_GPUOBJ_ALLOC + * @size: Size in bytes of the object to allocate + * @flags: mask of KGSL_MEMFLAG_* bits + * @va_len: Size in bytes of the virtual region to allocate + * @mmapsize: Returns the mmap() size of the object + * @id: Returns the GPU object ID of the new object + * @metadata_len: Length of the metdata to copy from the user + * @metadata: Pointer to the user specified metadata to store for the object + */ +struct kgsl_gpuobj_alloc { + uint64_t size; + uint64_t flags; + uint64_t va_len; + uint64_t mmapsize; + unsigned int id; + unsigned int metadata_len; + uint64_t metadata; +}; + +/* Let the user know that this header supports the gpuobj metadata */ +#define KGSL_GPUOBJ_ALLOC_METADATA_MAX 64 + +#define IOCTL_KGSL_GPUOBJ_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x45, struct kgsl_gpuobj_alloc) + +/** + * struct kgsl_gpuobj_free - Argument to IOCTL_KGLS_GPUOBJ_FREE + * @flags: Mask of: KGSL_GUPOBJ_FREE_ON_EVENT + * @priv: Pointer to the private object if KGSL_GPUOBJ_FREE_ON_EVENT is + * specified + * @id: ID of the GPU object to free + * @type: If KGSL_GPUOBJ_FREE_ON_EVENT is specified, the type of asynchronous + * event to free on + * @len: Length of the data passed in priv + */ +struct kgsl_gpuobj_free { + uint64_t flags; + uint64_t __user priv; + unsigned int id; + unsigned int type; + unsigned int len; +}; + +#define KGSL_GPUOBJ_FREE_ON_EVENT 1 + +#define KGSL_GPU_EVENT_TIMESTAMP 1 +#define KGSL_GPU_EVENT_FENCE 2 + +/** + * struct kgsl_gpu_event_timestamp - Specifies a timestamp event to free a GPU + * object on + * @context_id: ID of the timestamp event to wait for + * @timestamp: Timestamp of the timestamp event to wait for + */ +struct kgsl_gpu_event_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +/** + * struct kgsl_gpu_event_fence - Specifies a fence ID to to free a GPU object on + * @fd: File descriptor for the fence + */ +struct kgsl_gpu_event_fence { + int fd; +}; + +#define IOCTL_KGSL_GPUOBJ_FREE \ + _IOW(KGSL_IOC_TYPE, 0x46, struct kgsl_gpuobj_free) + +/** + * struct kgsl_gpuobj_info - argument to IOCTL_KGSL_GPUOBJ_INFO + * @gpuaddr: GPU address of the object + * @flags: Current flags for the object + * @size: Size of the object + * @va_len: VA size of the object + * @va_addr: Virtual address of the object (if it is mapped) + * id - GPU object ID of the object to query + */ +struct kgsl_gpuobj_info { + uint64_t gpuaddr; + uint64_t flags; + uint64_t size; + uint64_t va_len; + uint64_t va_addr; + unsigned int id; +}; + +#define IOCTL_KGSL_GPUOBJ_INFO \ + _IOWR(KGSL_IOC_TYPE, 0x47, struct kgsl_gpuobj_info) + +/** + * struct kgsl_gpuobj_import - argument to IOCTL_KGSL_GPUOBJ_IMPORT + * @priv: Pointer to the private data for the import type + * @priv_len: Length of the private data + * @flags: Mask of KGSL_MEMFLAG_ flags + * @type: Type of the import (KGSL_USER_MEM_TYPE_*) + * @id: Returns the ID of the new GPU object + */ +struct kgsl_gpuobj_import { + uint64_t __user priv; + uint64_t priv_len; + uint64_t flags; + unsigned int type; + unsigned int id; +}; + +/** + * struct kgsl_gpuobj_import_dma_buf - import a dmabuf object + * @fd: File descriptor for the dma-buf object + */ +struct kgsl_gpuobj_import_dma_buf { + int fd; +}; + +/** + * struct kgsl_gpuobj_import_useraddr - import an object based on a useraddr + * @virtaddr: Virtual address of the object to import + */ +struct kgsl_gpuobj_import_useraddr { + uint64_t virtaddr; +}; + +#define IOCTL_KGSL_GPUOBJ_IMPORT \ + _IOWR(KGSL_IOC_TYPE, 0x48, struct kgsl_gpuobj_import) + +/** + * struct kgsl_gpuobj_sync_obj - Individual GPU object to sync + * @offset: Offset within the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + * @op: Cache operation to execute + */ + +struct kgsl_gpuobj_sync_obj { + uint64_t offset; + uint64_t length; + unsigned int id; + unsigned int op; +}; + +/** + * struct kgsl_gpuobj_sync - Argument for IOCTL_KGSL_GPUOBJ_SYNC + * @objs: Pointer to an array of kgsl_gpuobj_sync_obj structs + * @obj_len: Size of each item in the array + * @count: Number of items in the array + */ + +struct kgsl_gpuobj_sync { + uint64_t __user objs; + unsigned int obj_len; + unsigned int count; +}; + +#define IOCTL_KGSL_GPUOBJ_SYNC \ + _IOW(KGSL_IOC_TYPE, 0x49, struct kgsl_gpuobj_sync) + +/** + * struct kgsl_command_object - GPU command object + * @offset: GPU address offset of the object + * @gpuaddr: GPU address of the object + * @size: Size of the object + * @flags: Current flags for the object + * @id - GPU command object ID + */ +struct kgsl_command_object { + uint64_t offset; + uint64_t gpuaddr; + uint64_t size; + unsigned int flags; + unsigned int id; +}; + +/** + * struct kgsl_command_syncpoint - GPU syncpoint object + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * @type: type of sync point defined here + */ +struct kgsl_command_syncpoint { + uint64_t __user priv; + uint64_t size; + unsigned int type; +}; + +/** + * struct kgsl_command_object - Argument for IOCTL_KGSL_GPU_COMMAND + * @flags: Current flags for the object + * @cmdlist: List of kgsl_command_objects for submission + * @cmd_size: Size of kgsl_command_objects structure + * @numcmds: Number of kgsl_command_objects in command list + * @objlist: List of kgsl_command_objects for tracking + * @obj_size: Size of kgsl_command_objects structure + * @numobjs: Number of kgsl_command_objects in object list + * @synclist: List of kgsl_command_syncpoints + * @sync_size: Size of kgsl_command_syncpoint structure + * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list + * @context_id: Context ID submittin ghte kgsl_gpu_command + * @timestamp: Timestamp for the submitted commands + */ +struct kgsl_gpu_command { + uint64_t flags; + uint64_t __user cmdlist; + unsigned int cmdsize; + unsigned int numcmds; + uint64_t __user objlist; + unsigned int objsize; + unsigned int numobjs; + uint64_t __user synclist; + unsigned int syncsize; + unsigned int numsyncs; + unsigned int context_id; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_GPU_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x4A, struct kgsl_gpu_command) + +/** + * struct kgsl_preemption_counters_query - argument to + * IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY + * @counters: Return preemption counters array + * @size_user: Size allocated by userspace + * @size_priority_level: Size of preemption counters for each + * priority level + * @max_priority_level: Return max number of priority levels + * + * Query the available preemption counters. The array counters + * is used to return preemption counters. The size of the array + * is passed in so the kernel will only write at most size_user + * or max available preemption counters. The total number of + * preemption counters is returned in max_priority_level. If the + * array or size passed in are invalid, then an error is + * returned back. + */ +struct kgsl_preemption_counters_query { + uint64_t __user counters; + unsigned int size_user; + unsigned int size_priority_level; + unsigned int max_priority_level; +}; + +#define IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x4B, struct kgsl_preemption_counters_query) + +/** + * struct kgsl_gpuobj_set_info - argument for IOCTL_KGSL_GPUOBJ_SET_INFO + * @flags: Flags to indicate which paramaters to change + * @metadata: If KGSL_GPUOBJ_SET_INFO_METADATA is set, a pointer to the new + * metadata + * @id: GPU memory object ID to change + * @metadata_len: If KGSL_GPUOBJ_SET_INFO_METADATA is set, the length of the + * new metadata string + * @type: If KGSL_GPUOBJ_SET_INFO_TYPE is set, the new type of the memory object + */ + +#define KGSL_GPUOBJ_SET_INFO_METADATA (1 << 0) +#define KGSL_GPUOBJ_SET_INFO_TYPE (1 << 1) + +struct kgsl_gpuobj_set_info { + uint64_t flags; + uint64_t metadata; + unsigned int id; + unsigned int metadata_len; + unsigned int type; +}; + +#define IOCTL_KGSL_GPUOBJ_SET_INFO \ + _IOW(KGSL_IOC_TYPE, 0x4C, struct kgsl_gpuobj_set_info) + +#endif /* _UAPI_MSM_KGSL_H */ diff --git a/extra/qcom_gpu_driver/msm_kgsl.py b/extra/qcom_gpu_driver/msm_kgsl.py new file mode 100644 index 00000000..1c5bca85 --- /dev/null +++ b/extra/qcom_gpu_driver/msm_kgsl.py @@ -0,0 +1,1034 @@ +# -*- coding: utf-8 -*- +# +# TARGET arch is: [] +# WORD_SIZE is: 8 +# POINTER_SIZE is: 8 +# LONGDOUBLE_SIZE is: 16 +# +import ctypes + + +class AsDictMixin: + @classmethod + def as_dict(cls, self): + result = {} + if not isinstance(self, AsDictMixin): + # not a structure, assume it's already a python object + return self + if not hasattr(cls, "_fields_"): + return result + # sys.version_info >= (3, 5) + # for (field, *_) in cls._fields_: # noqa + for field_tuple in cls._fields_: # noqa + field = field_tuple[0] + if field.startswith('PADDING_'): + continue + value = getattr(self, field) + type_ = type(value) + if hasattr(value, "_length_") and hasattr(value, "_type_"): + # array + if not hasattr(type_, "as_dict"): + value = [v for v in value] + else: + type_ = type_._type_ + value = [type_.as_dict(v) for v in value] + elif hasattr(value, "contents") and hasattr(value, "_type_"): + # pointer + try: + if not hasattr(type_, "as_dict"): + value = value.contents + else: + type_ = type_._type_ + value = type_.as_dict(value.contents) + except ValueError: + # nullptr + value = None + elif isinstance(value, AsDictMixin): + # other structure + value = type_.as_dict(value) + result[field] = value + return result + + +class Structure(ctypes.Structure, AsDictMixin): + + def __init__(self, *args, **kwds): + # We don't want to use positional arguments fill PADDING_* fields + + args = dict(zip(self.__class__._field_names_(), args)) + args.update(kwds) + super(Structure, self).__init__(**args) + + @classmethod + def _field_names_(cls): + if hasattr(cls, '_fields_'): + return (f[0] for f in cls._fields_ if not f[0].startswith('PADDING')) + else: + return () + + @classmethod + def get_type(cls, field): + for f in cls._fields_: + if f[0] == field: + return f[1] + return None + + @classmethod + def bind(cls, bound_fields): + fields = {} + for name, type_ in cls._fields_: + if hasattr(type_, "restype"): + if name in bound_fields: + if bound_fields[name] is None: + fields[name] = type_() + else: + # use a closure to capture the callback from the loop scope + fields[name] = ( + type_((lambda callback: lambda *args: callback(*args))( + bound_fields[name])) + ) + del bound_fields[name] + else: + # default callback implementation (does nothing) + try: + default_ = type_(0).restype().value + except TypeError: + default_ = None + fields[name] = type_(( + lambda default_: lambda *args: default_)(default_)) + else: + # not a callback function, use default initialization + if name in bound_fields: + fields[name] = bound_fields[name] + del bound_fields[name] + else: + fields[name] = type_() + if len(bound_fields) != 0: + raise ValueError( + "Cannot bind the following unknown callback(s) {}.{}".format( + cls.__name__, bound_fields.keys() + )) + return cls(**fields) + + +class Union(ctypes.Union, AsDictMixin): + pass + + + +c_int128 = ctypes.c_ubyte*16 +c_uint128 = c_int128 +void = None +if ctypes.sizeof(ctypes.c_longdouble) == 16: + c_long_double_t = ctypes.c_longdouble +else: + c_long_double_t = ctypes.c_ubyte*16 + + + + +# values for enumeration 'kgsl_user_mem_type' +kgsl_user_mem_type__enumvalues = { + 0: 'KGSL_USER_MEM_TYPE_PMEM', + 1: 'KGSL_USER_MEM_TYPE_ASHMEM', + 2: 'KGSL_USER_MEM_TYPE_ADDR', + 3: 'KGSL_USER_MEM_TYPE_ION', + 3: 'KGSL_USER_MEM_TYPE_DMABUF', + 7: 'KGSL_USER_MEM_TYPE_MAX', +} +KGSL_USER_MEM_TYPE_PMEM = 0 +KGSL_USER_MEM_TYPE_ASHMEM = 1 +KGSL_USER_MEM_TYPE_ADDR = 2 +KGSL_USER_MEM_TYPE_ION = 3 +KGSL_USER_MEM_TYPE_DMABUF = 3 +KGSL_USER_MEM_TYPE_MAX = 7 +kgsl_user_mem_type = ctypes.c_uint32 # enum + +# values for enumeration 'kgsl_ctx_reset_stat' +kgsl_ctx_reset_stat__enumvalues = { + 0: 'KGSL_CTX_STAT_NO_ERROR', + 1: 'KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT', + 2: 'KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT', + 3: 'KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT', +} +KGSL_CTX_STAT_NO_ERROR = 0 +KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 1 +KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 2 +KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 3 +kgsl_ctx_reset_stat = ctypes.c_uint32 # enum + +# values for enumeration 'kgsl_deviceid' +kgsl_deviceid__enumvalues = { + 0: 'KGSL_DEVICE_3D0', + 1: 'KGSL_DEVICE_MAX', +} +KGSL_DEVICE_3D0 = 0 +KGSL_DEVICE_MAX = 1 +kgsl_deviceid = ctypes.c_uint32 # enum +class struct_kgsl_devinfo(Structure): + pass + +struct_kgsl_devinfo._pack_ = 1 # source:False +struct_kgsl_devinfo._fields_ = [ + ('device_id', ctypes.c_uint32), + ('chip_id', ctypes.c_uint32), + ('mmu_enabled', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('gmem_gpubaseaddr', ctypes.c_uint64), + ('gpu_id', ctypes.c_uint32), + ('PADDING_1', ctypes.c_ubyte * 4), + ('gmem_sizebytes', ctypes.c_uint64), +] + +class struct_kgsl_devmemstore(Structure): + pass + +struct_kgsl_devmemstore._pack_ = 1 # source:False +struct_kgsl_devmemstore._fields_ = [ + ('soptimestamp', ctypes.c_uint32), + ('sbz', ctypes.c_uint32), + ('eoptimestamp', ctypes.c_uint32), + ('sbz2', ctypes.c_uint32), + ('preempted', ctypes.c_uint32), + ('sbz3', ctypes.c_uint32), + ('ref_wait_ts', ctypes.c_uint32), + ('sbz4', ctypes.c_uint32), + ('current_context', ctypes.c_uint32), + ('sbz5', ctypes.c_uint32), +] + + +# values for enumeration 'kgsl_timestamp_type' +kgsl_timestamp_type__enumvalues = { + 1: 'KGSL_TIMESTAMP_CONSUMED', + 2: 'KGSL_TIMESTAMP_RETIRED', + 3: 'KGSL_TIMESTAMP_QUEUED', +} +KGSL_TIMESTAMP_CONSUMED = 1 +KGSL_TIMESTAMP_RETIRED = 2 +KGSL_TIMESTAMP_QUEUED = 3 +kgsl_timestamp_type = ctypes.c_uint32 # enum +class struct_kgsl_shadowprop(Structure): + pass + +struct_kgsl_shadowprop._pack_ = 1 # source:False +struct_kgsl_shadowprop._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('flags', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_version(Structure): + pass + +struct_kgsl_version._pack_ = 1 # source:False +struct_kgsl_version._fields_ = [ + ('drv_major', ctypes.c_uint32), + ('drv_minor', ctypes.c_uint32), + ('dev_major', ctypes.c_uint32), + ('dev_minor', ctypes.c_uint32), +] + +class struct_kgsl_sp_generic_mem(Structure): + pass + +struct_kgsl_sp_generic_mem._pack_ = 1 # source:False +struct_kgsl_sp_generic_mem._fields_ = [ + ('local', ctypes.c_uint64), + ('pvt', ctypes.c_uint64), +] + +class struct_kgsl_ucode_version(Structure): + pass + +struct_kgsl_ucode_version._pack_ = 1 # source:False +struct_kgsl_ucode_version._fields_ = [ + ('pfp', ctypes.c_uint32), + ('pm4', ctypes.c_uint32), +] + +class struct_kgsl_gpmu_version(Structure): + pass + +struct_kgsl_gpmu_version._pack_ = 1 # source:False +struct_kgsl_gpmu_version._fields_ = [ + ('major', ctypes.c_uint32), + ('minor', ctypes.c_uint32), + ('features', ctypes.c_uint32), +] + +class struct_kgsl_ibdesc(Structure): + pass + +struct_kgsl_ibdesc._pack_ = 1 # source:False +struct_kgsl_ibdesc._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('__pad', ctypes.c_uint64), + ('sizedwords', ctypes.c_uint64), + ('ctrl', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_cmdbatch_profiling_buffer(Structure): + pass + +struct_kgsl_cmdbatch_profiling_buffer._pack_ = 1 # source:False +struct_kgsl_cmdbatch_profiling_buffer._fields_ = [ + ('wall_clock_s', ctypes.c_uint64), + ('wall_clock_ns', ctypes.c_uint64), + ('gpu_ticks_queued', ctypes.c_uint64), + ('gpu_ticks_submitted', ctypes.c_uint64), + ('gpu_ticks_retired', ctypes.c_uint64), +] + +class struct_kgsl_device_getproperty(Structure): + pass + +struct_kgsl_device_getproperty._pack_ = 1 # source:False +struct_kgsl_device_getproperty._fields_ = [ + ('type', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('value', ctypes.POINTER(None)), + ('sizebytes', ctypes.c_uint64), +] + +class struct_kgsl_device_waittimestamp(Structure): + pass + +struct_kgsl_device_waittimestamp._pack_ = 1 # source:False +struct_kgsl_device_waittimestamp._fields_ = [ + ('timestamp', ctypes.c_uint32), + ('timeout', ctypes.c_uint32), +] + +class struct_kgsl_device_waittimestamp_ctxtid(Structure): + pass + +struct_kgsl_device_waittimestamp_ctxtid._pack_ = 1 # source:False +struct_kgsl_device_waittimestamp_ctxtid._fields_ = [ + ('context_id', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), + ('timeout', ctypes.c_uint32), +] + +class struct_kgsl_ringbuffer_issueibcmds(Structure): + pass + +struct_kgsl_ringbuffer_issueibcmds._pack_ = 1 # source:False +struct_kgsl_ringbuffer_issueibcmds._fields_ = [ + ('drawctxt_id', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('ibdesc_addr', ctypes.c_uint64), + ('numibs', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), + ('flags', ctypes.c_uint32), + ('PADDING_1', ctypes.c_ubyte * 4), +] + +class struct_kgsl_cmdstream_readtimestamp(Structure): + pass + +struct_kgsl_cmdstream_readtimestamp._pack_ = 1 # source:False +struct_kgsl_cmdstream_readtimestamp._fields_ = [ + ('type', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_cmdstream_freememontimestamp(Structure): + pass + +struct_kgsl_cmdstream_freememontimestamp._pack_ = 1 # source:False +struct_kgsl_cmdstream_freememontimestamp._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('type', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_drawctxt_create(Structure): + pass + +struct_kgsl_drawctxt_create._pack_ = 1 # source:False +struct_kgsl_drawctxt_create._fields_ = [ + ('flags', ctypes.c_uint32), + ('drawctxt_id', ctypes.c_uint32), +] + +class struct_kgsl_drawctxt_destroy(Structure): + pass + +struct_kgsl_drawctxt_destroy._pack_ = 1 # source:False +struct_kgsl_drawctxt_destroy._fields_ = [ + ('drawctxt_id', ctypes.c_uint32), +] + +class struct_kgsl_map_user_mem(Structure): + pass + +struct_kgsl_map_user_mem._pack_ = 1 # source:False +struct_kgsl_map_user_mem._fields_ = [ + ('fd', ctypes.c_int32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('gpuaddr', ctypes.c_uint64), + ('len', ctypes.c_uint64), + ('offset', ctypes.c_uint64), + ('hostptr', ctypes.c_uint64), + ('memtype', kgsl_user_mem_type), + ('flags', ctypes.c_uint32), +] + +class struct_kgsl_cmdstream_readtimestamp_ctxtid(Structure): + pass + +struct_kgsl_cmdstream_readtimestamp_ctxtid._pack_ = 1 # source:False +struct_kgsl_cmdstream_readtimestamp_ctxtid._fields_ = [ + ('context_id', ctypes.c_uint32), + ('type', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_cmdstream_freememontimestamp_ctxtid(Structure): + pass + +struct_kgsl_cmdstream_freememontimestamp_ctxtid._pack_ = 1 # source:False +struct_kgsl_cmdstream_freememontimestamp_ctxtid._fields_ = [ + ('context_id', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('gpuaddr', ctypes.c_uint64), + ('type', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_sharedmem_from_pmem(Structure): + pass + +struct_kgsl_sharedmem_from_pmem._pack_ = 1 # source:False +struct_kgsl_sharedmem_from_pmem._fields_ = [ + ('pmem_fd', ctypes.c_int32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('gpuaddr', ctypes.c_uint64), + ('len', ctypes.c_uint32), + ('offset', ctypes.c_uint32), +] + +class struct_kgsl_sharedmem_free(Structure): + pass + +struct_kgsl_sharedmem_free._pack_ = 1 # source:False +struct_kgsl_sharedmem_free._fields_ = [ + ('gpuaddr', ctypes.c_uint64), +] + +class struct_kgsl_cff_user_event(Structure): + pass + +struct_kgsl_cff_user_event._pack_ = 1 # source:False +struct_kgsl_cff_user_event._fields_ = [ + ('cff_opcode', ctypes.c_ubyte), + ('PADDING_0', ctypes.c_ubyte * 3), + ('op1', ctypes.c_uint32), + ('op2', ctypes.c_uint32), + ('op3', ctypes.c_uint32), + ('op4', ctypes.c_uint32), + ('op5', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 2), +] + +class struct_kgsl_gmem_desc(Structure): + pass + +struct_kgsl_gmem_desc._pack_ = 1 # source:False +struct_kgsl_gmem_desc._fields_ = [ + ('x', ctypes.c_uint32), + ('y', ctypes.c_uint32), + ('width', ctypes.c_uint32), + ('height', ctypes.c_uint32), + ('pitch', ctypes.c_uint32), +] + +class struct_kgsl_buffer_desc(Structure): + pass + +struct_kgsl_buffer_desc._pack_ = 1 # source:False +struct_kgsl_buffer_desc._fields_ = [ + ('hostptr', ctypes.POINTER(None)), + ('gpuaddr', ctypes.c_uint64), + ('size', ctypes.c_int32), + ('format', ctypes.c_uint32), + ('pitch', ctypes.c_uint32), + ('enabled', ctypes.c_uint32), +] + +class struct_kgsl_bind_gmem_shadow(Structure): + pass + +struct_kgsl_bind_gmem_shadow._pack_ = 1 # source:False +struct_kgsl_bind_gmem_shadow._fields_ = [ + ('drawctxt_id', ctypes.c_uint32), + ('gmem_desc', struct_kgsl_gmem_desc), + ('shadow_x', ctypes.c_uint32), + ('shadow_y', ctypes.c_uint32), + ('shadow_buffer', struct_kgsl_buffer_desc), + ('buffer_id', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_sharedmem_from_vmalloc(Structure): + pass + +struct_kgsl_sharedmem_from_vmalloc._pack_ = 1 # source:False +struct_kgsl_sharedmem_from_vmalloc._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('hostptr', ctypes.c_uint32), + ('flags', ctypes.c_uint32), +] + +class struct_kgsl_drawctxt_set_bin_base_offset(Structure): + pass + +struct_kgsl_drawctxt_set_bin_base_offset._pack_ = 1 # source:False +struct_kgsl_drawctxt_set_bin_base_offset._fields_ = [ + ('drawctxt_id', ctypes.c_uint32), + ('offset', ctypes.c_uint32), +] + + +# values for enumeration 'kgsl_cmdwindow_type' +kgsl_cmdwindow_type__enumvalues = { + 0: 'KGSL_CMDWINDOW_MIN', + 0: 'KGSL_CMDWINDOW_2D', + 1: 'KGSL_CMDWINDOW_3D', + 2: 'KGSL_CMDWINDOW_MMU', + 255: 'KGSL_CMDWINDOW_ARBITER', + 255: 'KGSL_CMDWINDOW_MAX', +} +KGSL_CMDWINDOW_MIN = 0 +KGSL_CMDWINDOW_2D = 0 +KGSL_CMDWINDOW_3D = 1 +KGSL_CMDWINDOW_MMU = 2 +KGSL_CMDWINDOW_ARBITER = 255 +KGSL_CMDWINDOW_MAX = 255 +kgsl_cmdwindow_type = ctypes.c_uint32 # enum +class struct_kgsl_cmdwindow_write(Structure): + pass + +struct_kgsl_cmdwindow_write._pack_ = 1 # source:False +struct_kgsl_cmdwindow_write._fields_ = [ + ('target', kgsl_cmdwindow_type), + ('addr', ctypes.c_uint32), + ('data', ctypes.c_uint32), +] + +class struct_kgsl_gpumem_alloc(Structure): + pass + +struct_kgsl_gpumem_alloc._pack_ = 1 # source:False +struct_kgsl_gpumem_alloc._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('flags', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_cff_syncmem(Structure): + pass + +struct_kgsl_cff_syncmem._pack_ = 1 # source:False +struct_kgsl_cff_syncmem._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('len', ctypes.c_uint64), + ('__pad', ctypes.c_uint32 * 2), +] + +class struct_kgsl_timestamp_event(Structure): + pass + +struct_kgsl_timestamp_event._pack_ = 1 # source:False +struct_kgsl_timestamp_event._fields_ = [ + ('type', ctypes.c_int32), + ('timestamp', ctypes.c_uint32), + ('context_id', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('priv', ctypes.POINTER(None)), + ('len', ctypes.c_uint64), +] + +class struct_kgsl_timestamp_event_genlock(Structure): + pass + +struct_kgsl_timestamp_event_genlock._pack_ = 1 # source:False +struct_kgsl_timestamp_event_genlock._fields_ = [ + ('handle', ctypes.c_int32), +] + +class struct_kgsl_timestamp_event_fence(Structure): + pass + +struct_kgsl_timestamp_event_fence._pack_ = 1 # source:False +struct_kgsl_timestamp_event_fence._fields_ = [ + ('fence_fd', ctypes.c_int32), +] + +class struct_kgsl_gpumem_alloc_id(Structure): + pass + +struct_kgsl_gpumem_alloc_id._pack_ = 1 # source:False +struct_kgsl_gpumem_alloc_id._fields_ = [ + ('id', ctypes.c_uint32), + ('flags', ctypes.c_uint32), + ('size', ctypes.c_uint64), + ('mmapsize', ctypes.c_uint64), + ('gpuaddr', ctypes.c_uint64), + ('__pad', ctypes.c_uint64 * 2), +] + +class struct_kgsl_gpumem_free_id(Structure): + pass + +struct_kgsl_gpumem_free_id._pack_ = 1 # source:False +struct_kgsl_gpumem_free_id._fields_ = [ + ('id', ctypes.c_uint32), + ('__pad', ctypes.c_uint32), +] + +class struct_kgsl_gpumem_get_info(Structure): + pass + +struct_kgsl_gpumem_get_info._pack_ = 1 # source:False +struct_kgsl_gpumem_get_info._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('flags', ctypes.c_uint32), + ('size', ctypes.c_uint64), + ('mmapsize', ctypes.c_uint64), + ('useraddr', ctypes.c_uint64), + ('__pad', ctypes.c_uint64 * 4), +] + +class struct_kgsl_gpumem_sync_cache(Structure): + pass + +struct_kgsl_gpumem_sync_cache._pack_ = 1 # source:False +struct_kgsl_gpumem_sync_cache._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('op', ctypes.c_uint32), + ('offset', ctypes.c_uint64), + ('length', ctypes.c_uint64), +] + +class struct_kgsl_perfcounter_get(Structure): + pass + +struct_kgsl_perfcounter_get._pack_ = 1 # source:False +struct_kgsl_perfcounter_get._fields_ = [ + ('groupid', ctypes.c_uint32), + ('countable', ctypes.c_uint32), + ('offset', ctypes.c_uint32), + ('offset_hi', ctypes.c_uint32), + ('__pad', ctypes.c_uint32), +] + +class struct_kgsl_perfcounter_put(Structure): + pass + +struct_kgsl_perfcounter_put._pack_ = 1 # source:False +struct_kgsl_perfcounter_put._fields_ = [ + ('groupid', ctypes.c_uint32), + ('countable', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 2), +] + +class struct_kgsl_perfcounter_query(Structure): + pass + +struct_kgsl_perfcounter_query._pack_ = 1 # source:False +struct_kgsl_perfcounter_query._fields_ = [ + ('groupid', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('countables', ctypes.POINTER(ctypes.c_uint32)), + ('count', ctypes.c_uint32), + ('max_counters', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 2), +] + +class struct_kgsl_perfcounter_read_group(Structure): + pass + +struct_kgsl_perfcounter_read_group._pack_ = 1 # source:False +struct_kgsl_perfcounter_read_group._fields_ = [ + ('groupid', ctypes.c_uint32), + ('countable', ctypes.c_uint32), + ('value', ctypes.c_uint64), +] + +class struct_kgsl_perfcounter_read(Structure): + pass + +struct_kgsl_perfcounter_read._pack_ = 1 # source:False +struct_kgsl_perfcounter_read._fields_ = [ + ('reads', ctypes.POINTER(struct_kgsl_perfcounter_read_group)), + ('count', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 2), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_gpumem_sync_cache_bulk(Structure): + pass + +struct_kgsl_gpumem_sync_cache_bulk._pack_ = 1 # source:False +struct_kgsl_gpumem_sync_cache_bulk._fields_ = [ + ('id_list', ctypes.POINTER(ctypes.c_uint32)), + ('count', ctypes.c_uint32), + ('op', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 2), +] + +class struct_kgsl_cmd_syncpoint_timestamp(Structure): + pass + +struct_kgsl_cmd_syncpoint_timestamp._pack_ = 1 # source:False +struct_kgsl_cmd_syncpoint_timestamp._fields_ = [ + ('context_id', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_cmd_syncpoint_fence(Structure): + pass + +struct_kgsl_cmd_syncpoint_fence._pack_ = 1 # source:False +struct_kgsl_cmd_syncpoint_fence._fields_ = [ + ('fd', ctypes.c_int32), +] + +class struct_kgsl_cmd_syncpoint(Structure): + pass + +struct_kgsl_cmd_syncpoint._pack_ = 1 # source:False +struct_kgsl_cmd_syncpoint._fields_ = [ + ('type', ctypes.c_int32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('priv', ctypes.POINTER(None)), + ('size', ctypes.c_uint64), +] + +class struct_kgsl_submit_commands(Structure): + pass + +struct_kgsl_submit_commands._pack_ = 1 # source:False +struct_kgsl_submit_commands._fields_ = [ + ('context_id', ctypes.c_uint32), + ('flags', ctypes.c_uint32), + ('cmdlist', ctypes.POINTER(struct_kgsl_ibdesc)), + ('numcmds', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), + ('synclist', ctypes.POINTER(struct_kgsl_cmd_syncpoint)), + ('numsyncs', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 4), +] + +class struct_kgsl_device_constraint(Structure): + pass + +struct_kgsl_device_constraint._pack_ = 1 # source:False +struct_kgsl_device_constraint._fields_ = [ + ('type', ctypes.c_uint32), + ('context_id', ctypes.c_uint32), + ('data', ctypes.POINTER(None)), + ('size', ctypes.c_uint64), +] + +class struct_kgsl_device_constraint_pwrlevel(Structure): + pass + +struct_kgsl_device_constraint_pwrlevel._pack_ = 1 # source:False +struct_kgsl_device_constraint_pwrlevel._fields_ = [ + ('level', ctypes.c_uint32), +] + +class struct_kgsl_syncsource_create(Structure): + pass + +struct_kgsl_syncsource_create._pack_ = 1 # source:False +struct_kgsl_syncsource_create._fields_ = [ + ('id', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 3), +] + +class struct_kgsl_syncsource_destroy(Structure): + pass + +struct_kgsl_syncsource_destroy._pack_ = 1 # source:False +struct_kgsl_syncsource_destroy._fields_ = [ + ('id', ctypes.c_uint32), + ('__pad', ctypes.c_uint32 * 3), +] + +class struct_kgsl_syncsource_create_fence(Structure): + pass + +struct_kgsl_syncsource_create_fence._pack_ = 1 # source:False +struct_kgsl_syncsource_create_fence._fields_ = [ + ('id', ctypes.c_uint32), + ('fence_fd', ctypes.c_int32), + ('__pad', ctypes.c_uint32 * 4), +] + +class struct_kgsl_syncsource_signal_fence(Structure): + pass + +struct_kgsl_syncsource_signal_fence._pack_ = 1 # source:False +struct_kgsl_syncsource_signal_fence._fields_ = [ + ('id', ctypes.c_uint32), + ('fence_fd', ctypes.c_int32), + ('__pad', ctypes.c_uint32 * 4), +] + +class struct_kgsl_cff_sync_gpuobj(Structure): + pass + +struct_kgsl_cff_sync_gpuobj._pack_ = 1 # source:False +struct_kgsl_cff_sync_gpuobj._fields_ = [ + ('offset', ctypes.c_uint64), + ('length', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_gpuobj_alloc(Structure): + pass + +struct_kgsl_gpuobj_alloc._pack_ = 1 # source:False +struct_kgsl_gpuobj_alloc._fields_ = [ + ('size', ctypes.c_uint64), + ('flags', ctypes.c_uint64), + ('va_len', ctypes.c_uint64), + ('mmapsize', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('metadata_len', ctypes.c_uint32), + ('metadata', ctypes.c_uint64), +] + +class struct_kgsl_gpuobj_free(Structure): + pass + +struct_kgsl_gpuobj_free._pack_ = 1 # source:False +struct_kgsl_gpuobj_free._fields_ = [ + ('flags', ctypes.c_uint64), + ('priv', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('type', ctypes.c_uint32), + ('len', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_gpu_event_timestamp(Structure): + pass + +struct_kgsl_gpu_event_timestamp._pack_ = 1 # source:False +struct_kgsl_gpu_event_timestamp._fields_ = [ + ('context_id', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_gpu_event_fence(Structure): + pass + +struct_kgsl_gpu_event_fence._pack_ = 1 # source:False +struct_kgsl_gpu_event_fence._fields_ = [ + ('fd', ctypes.c_int32), +] + +class struct_kgsl_gpuobj_info(Structure): + pass + +struct_kgsl_gpuobj_info._pack_ = 1 # source:False +struct_kgsl_gpuobj_info._fields_ = [ + ('gpuaddr', ctypes.c_uint64), + ('flags', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('va_len', ctypes.c_uint64), + ('va_addr', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_gpuobj_import(Structure): + pass + +struct_kgsl_gpuobj_import._pack_ = 1 # source:False +struct_kgsl_gpuobj_import._fields_ = [ + ('priv', ctypes.c_uint64), + ('priv_len', ctypes.c_uint64), + ('flags', ctypes.c_uint64), + ('type', ctypes.c_uint32), + ('id', ctypes.c_uint32), +] + +class struct_kgsl_gpuobj_import_dma_buf(Structure): + pass + +struct_kgsl_gpuobj_import_dma_buf._pack_ = 1 # source:False +struct_kgsl_gpuobj_import_dma_buf._fields_ = [ + ('fd', ctypes.c_int32), +] + +class struct_kgsl_gpuobj_import_useraddr(Structure): + pass + +struct_kgsl_gpuobj_import_useraddr._pack_ = 1 # source:False +struct_kgsl_gpuobj_import_useraddr._fields_ = [ + ('virtaddr', ctypes.c_uint64), +] + +class struct_kgsl_gpuobj_sync_obj(Structure): + pass + +struct_kgsl_gpuobj_sync_obj._pack_ = 1 # source:False +struct_kgsl_gpuobj_sync_obj._fields_ = [ + ('offset', ctypes.c_uint64), + ('length', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('op', ctypes.c_uint32), +] + +class struct_kgsl_gpuobj_sync(Structure): + pass + +struct_kgsl_gpuobj_sync._pack_ = 1 # source:False +struct_kgsl_gpuobj_sync._fields_ = [ + ('objs', ctypes.c_uint64), + ('obj_len', ctypes.c_uint32), + ('count', ctypes.c_uint32), +] + +class struct_kgsl_command_object(Structure): + pass + +struct_kgsl_command_object._pack_ = 1 # source:False +struct_kgsl_command_object._fields_ = [ + ('offset', ctypes.c_uint64), + ('gpuaddr', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('flags', ctypes.c_uint32), + ('id', ctypes.c_uint32), +] + +class struct_kgsl_command_syncpoint(Structure): + pass + +struct_kgsl_command_syncpoint._pack_ = 1 # source:False +struct_kgsl_command_syncpoint._fields_ = [ + ('priv', ctypes.c_uint64), + ('size', ctypes.c_uint64), + ('type', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_gpu_command(Structure): + pass + +struct_kgsl_gpu_command._pack_ = 1 # source:False +struct_kgsl_gpu_command._fields_ = [ + ('flags', ctypes.c_uint64), + ('cmdlist', ctypes.c_uint64), + ('cmdsize', ctypes.c_uint32), + ('numcmds', ctypes.c_uint32), + ('objlist', ctypes.c_uint64), + ('objsize', ctypes.c_uint32), + ('numobjs', ctypes.c_uint32), + ('synclist', ctypes.c_uint64), + ('syncsize', ctypes.c_uint32), + ('numsyncs', ctypes.c_uint32), + ('context_id', ctypes.c_uint32), + ('timestamp', ctypes.c_uint32), +] + +class struct_kgsl_preemption_counters_query(Structure): + pass + +struct_kgsl_preemption_counters_query._pack_ = 1 # source:False +struct_kgsl_preemption_counters_query._fields_ = [ + ('counters', ctypes.c_uint64), + ('size_user', ctypes.c_uint32), + ('size_priority_level', ctypes.c_uint32), + ('max_priority_level', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +class struct_kgsl_gpuobj_set_info(Structure): + pass + +struct_kgsl_gpuobj_set_info._pack_ = 1 # source:False +struct_kgsl_gpuobj_set_info._fields_ = [ + ('flags', ctypes.c_uint64), + ('metadata', ctypes.c_uint64), + ('id', ctypes.c_uint32), + ('metadata_len', ctypes.c_uint32), + ('type', ctypes.c_uint32), + ('PADDING_0', ctypes.c_ubyte * 4), +] + +__all__ = \ + ['KGSL_CMDWINDOW_2D', 'KGSL_CMDWINDOW_3D', + 'KGSL_CMDWINDOW_ARBITER', 'KGSL_CMDWINDOW_MAX', + 'KGSL_CMDWINDOW_MIN', 'KGSL_CMDWINDOW_MMU', + 'KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT', + 'KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT', + 'KGSL_CTX_STAT_NO_ERROR', + 'KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT', 'KGSL_DEVICE_3D0', + 'KGSL_DEVICE_MAX', 'KGSL_TIMESTAMP_CONSUMED', + 'KGSL_TIMESTAMP_QUEUED', 'KGSL_TIMESTAMP_RETIRED', + 'KGSL_USER_MEM_TYPE_ADDR', 'KGSL_USER_MEM_TYPE_ASHMEM', + 'KGSL_USER_MEM_TYPE_DMABUF', 'KGSL_USER_MEM_TYPE_ION', + 'KGSL_USER_MEM_TYPE_MAX', 'KGSL_USER_MEM_TYPE_PMEM', + 'kgsl_cmdwindow_type', 'kgsl_ctx_reset_stat', 'kgsl_deviceid', + 'kgsl_timestamp_type', 'kgsl_user_mem_type', + 'struct_kgsl_bind_gmem_shadow', 'struct_kgsl_buffer_desc', + 'struct_kgsl_cff_sync_gpuobj', 'struct_kgsl_cff_syncmem', + 'struct_kgsl_cff_user_event', 'struct_kgsl_cmd_syncpoint', + 'struct_kgsl_cmd_syncpoint_fence', + 'struct_kgsl_cmd_syncpoint_timestamp', + 'struct_kgsl_cmdbatch_profiling_buffer', + 'struct_kgsl_cmdstream_freememontimestamp', + 'struct_kgsl_cmdstream_freememontimestamp_ctxtid', + 'struct_kgsl_cmdstream_readtimestamp', + 'struct_kgsl_cmdstream_readtimestamp_ctxtid', + 'struct_kgsl_cmdwindow_write', 'struct_kgsl_command_object', + 'struct_kgsl_command_syncpoint', 'struct_kgsl_device_constraint', + 'struct_kgsl_device_constraint_pwrlevel', + 'struct_kgsl_device_getproperty', + 'struct_kgsl_device_waittimestamp', + 'struct_kgsl_device_waittimestamp_ctxtid', 'struct_kgsl_devinfo', + 'struct_kgsl_devmemstore', 'struct_kgsl_drawctxt_create', + 'struct_kgsl_drawctxt_destroy', + 'struct_kgsl_drawctxt_set_bin_base_offset', + 'struct_kgsl_gmem_desc', 'struct_kgsl_gpmu_version', + 'struct_kgsl_gpu_command', 'struct_kgsl_gpu_event_fence', + 'struct_kgsl_gpu_event_timestamp', 'struct_kgsl_gpumem_alloc', + 'struct_kgsl_gpumem_alloc_id', 'struct_kgsl_gpumem_free_id', + 'struct_kgsl_gpumem_get_info', 'struct_kgsl_gpumem_sync_cache', + 'struct_kgsl_gpumem_sync_cache_bulk', 'struct_kgsl_gpuobj_alloc', + 'struct_kgsl_gpuobj_free', 'struct_kgsl_gpuobj_import', + 'struct_kgsl_gpuobj_import_dma_buf', + 'struct_kgsl_gpuobj_import_useraddr', 'struct_kgsl_gpuobj_info', + 'struct_kgsl_gpuobj_set_info', 'struct_kgsl_gpuobj_sync', + 'struct_kgsl_gpuobj_sync_obj', 'struct_kgsl_ibdesc', + 'struct_kgsl_map_user_mem', 'struct_kgsl_perfcounter_get', + 'struct_kgsl_perfcounter_put', 'struct_kgsl_perfcounter_query', + 'struct_kgsl_perfcounter_read', + 'struct_kgsl_perfcounter_read_group', + 'struct_kgsl_preemption_counters_query', + 'struct_kgsl_ringbuffer_issueibcmds', 'struct_kgsl_shadowprop', + 'struct_kgsl_sharedmem_free', 'struct_kgsl_sharedmem_from_pmem', + 'struct_kgsl_sharedmem_from_vmalloc', + 'struct_kgsl_sp_generic_mem', 'struct_kgsl_submit_commands', + 'struct_kgsl_syncsource_create', + 'struct_kgsl_syncsource_create_fence', + 'struct_kgsl_syncsource_destroy', + 'struct_kgsl_syncsource_signal_fence', + 'struct_kgsl_timestamp_event', + 'struct_kgsl_timestamp_event_fence', + 'struct_kgsl_timestamp_event_genlock', + 'struct_kgsl_ucode_version', 'struct_kgsl_version'] diff --git a/extra/qcom_gpu_driver/opencl_ioctl.py b/extra/qcom_gpu_driver/opencl_ioctl.py new file mode 100644 index 00000000..32255f22 --- /dev/null +++ b/extra/qcom_gpu_driver/opencl_ioctl.py @@ -0,0 +1,171 @@ +import ctypes, ctypes.util, struct, fcntl, re +from hexdump import hexdump +from tinygrad.runtime.ops_gpu import CLDevice, CLAllocator +import pathlib, sys +sys.path.append(pathlib.Path(__file__).parent.parent.parent.as_posix()) + +ops = {} +import xml.etree.ElementTree as ET +xml = ET.parse(pathlib.Path(__file__).parent / "adreno_pm4.xml") +for child in xml.getroot(): + if 'name' in child.attrib and child.attrib['name'] == "adreno_pm4_type3_packets": + for sc in child: + if 'name' in sc.attrib and ('variants' not in sc.attrib or sc.attrib['variants'] != "A2XX"): + ops[int(sc.attrib['value'], 0x10)] = sc.attrib['name'] +#print(ops) +#exit(0) + +from extra.qcom_gpu_driver import msm_kgsl +def ioctls_from_header(): + hdr = (pathlib.Path(__file__).parent.parent.parent / "extra/qcom_gpu_driver/msm_kgsl.h").read_text().replace("\\\n", "") + pattern = r'#define\s+(IOCTL_KGSL_[A-Z0-9_]+)\s+_IOWR?\(KGSL_IOC_TYPE,\s+(0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)' + matches = re.findall(pattern, hdr, re.MULTILINE) + return {int(nr, 0x10):(name, getattr(msm_kgsl, "struct_"+sname)) for name, nr, sname in matches} + +nrs = ioctls_from_header() + +# https://github.com/ensc/dietlibc/blob/master/include/sys/aarch64-ioctl.h + +def get_struct(argp, stype): + return ctypes.cast(ctypes.c_void_p(argp), ctypes.POINTER(stype)).contents + +def format_struct(s): + sdats = [] + for field_name, field_type in s._fields_: + if field_name in {"__pad", "PADDING_0"}: continue + dat = getattr(s, field_name) + if isinstance(dat, int): sdats.append(f"{field_name}:0x{dat:X}") + else: sdats.append(f"{field_name}:{dat}") + return sdats + +import mmap +mmaped = {} +def get_mem(addr, vlen): + for k,v in mmaped.items(): + if k <= addr and addr < k+len(v): + return v[addr-k:addr-k+vlen] + +def hprint(vals): + ret = [] + for v in vals: + if v > 31: ret.append(f"{v:#x}") + else: ret.append(f"{v}") + return f"({','.join(ret)})" + +ST6_SHADER = 0 +ST6_CONSTANTS = 1 + +def parse_cmd_buf(dat): + ptr = 0 + while ptr < len(dat): + cmd = struct.unpack("I", dat[ptr:ptr+4])[0] + if (cmd>>24) == 0x70: + # packet with opcode and opcode specific payload (replace pkt3) + opcode, size = ((cmd>>16)&0x7F), cmd&0x3FFF + vals = struct.unpack("I"*size, dat[ptr+4:ptr+4+4*size]) + print(f"{ptr:3X} -- typ 7: {size=:3d}, {opcode=:#x} {ops[opcode]}", hprint(vals)) + if ops[opcode] == "CP_LOAD_STATE6_FRAG": + dst_off = vals[0] & 0x3FFF + state_type = (vals[0]>>14) & 0x3 + state_src = (vals[0]>>16) & 0x3 + state_block = (vals[0]>>18) & 0xF # 13 = SB4_CS_SHADER + num_unit = vals[0]>>22 + print(f"{num_unit=} {state_block=} {state_src=} {state_type=} {dst_off=}") + + from disassemblers.adreno import disasm_raw + if state_type == ST6_SHADER: disasm_raw(get_mem(((vals[2] << 32) | vals[1]), 0x180)) + if state_type == ST6_CONSTANTS: hexdump(get_mem(((vals[2] << 32) | vals[1]), min(0x180, num_unit*4))) + pass + ptr += 4*size + elif (cmd>>28) == 0x4: + # write one or more registers (replace pkt0) + offset, size = ((cmd>>8)&0x7FFFF), cmd&0x7F + vals = struct.unpack("I"*size, dat[ptr+4:ptr+4+4*size]) + print(f"{ptr:3X} -- typ 4: {size=:3d}, {offset=:#x}", hprint(vals)) + ptr += 4*size + else: + print("unk", hex(cmd)) + ptr += 4 + +@ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_ulong, ctypes.c_void_p) +def ioctl(fd, request, argp): + ret = libc.syscall(0x1d, ctypes.c_int(fd), ctypes.c_ulong(request), ctypes.c_void_p(argp)) + + idir, size, itype, nr = (request>>30), (request>>16)&0x3FFF, (request>>8)&0xFF, request&0xFF + if nr in nrs and itype == 9: + name, stype = nrs[nr] + s = get_struct(argp, stype) + print(f"{ret:2d} = {name:40s}", ' '.join(format_struct(s))) + if name == "IOCTL_KGSL_GPUOBJ_INFO": + mmaped[s.gpuaddr] = mmap.mmap(fd, s.size, offset=s.id*0x1000) + if name == "IOCTL_KGSL_GPU_COMMAND": + for i in range(s.numcmds): + cmd = get_struct(s.cmdlist+s.cmdsize*i, msm_kgsl.struct_kgsl_command_object) + print(f"cmd {i}:", format_struct(cmd)) + #hexdump(get_mem(cmd.gpuaddr, cmd.size)) + parse_cmd_buf(get_mem(cmd.gpuaddr, cmd.size)) + for i in range(s.numobjs): + obj = get_struct(s.objlist+s.objsize*i, msm_kgsl.struct_kgsl_command_object) + print(f"obj {i}:", format_struct(obj)) + print(format_struct(msm_kgsl.struct_kgsl_cmdbatch_profiling_buffer.from_buffer_copy(get_mem(obj.gpuaddr, obj.size)))) + #hexdump(get_mem(obj.gpuaddr, obj.size)) + else: + #print(f"ioctl({fd=}, (dir:{idir}, size:0x{size:3X}, type:{itype:d}, nr:0x{nr:2X}), {argp=:X}) = {ret=}") + pass + + return ret + +def install_hook(c_function, python_function): + # AARCH64 trampoline to ioctl + tramp = b"\x70\x00\x00\x10\x10\x02\x40\xf9\x00\x02\x1f\xd6" + tramp += struct.pack("Q", ctypes.cast(ctypes.byref(python_function), ctypes.POINTER(ctypes.c_ulong)).contents.value) + + # get real ioctl address + ioctl_address = ctypes.cast(ctypes.byref(c_function), ctypes.POINTER(ctypes.c_ulong)) + + # hook ioctl + libc = ctypes.CDLL(ctypes.util.find_library("libc")) + ret = libc.mprotect(ctypes.c_ulong((ioctl_address.contents.value//0x1000)*0x1000), 0x2000, 7) + assert ret == 0 + libc.memcpy(ioctl_address.contents, ctypes.create_string_buffer(tramp), len(tramp)) + +libc = ctypes.CDLL(ctypes.util.find_library("libc")) +install_hook(libc.ioctl, ioctl) + +""" +print("***** init device") +dev = CLDevice() +print("***** alloc") +alloc = CLAllocator(dev) +a = alloc._alloc(16) +#alloc._alloc(0x2000) +ba = bytearray(b"hello") +print(f"***** copyin {ctypes.addressof((ctypes.c_char * len(ba)).from_buffer(ba)):#x}") +alloc.copyin(a, memoryview(ba)) +dev.synchronize() +print("***** copyout") +mv2 = memoryview(bytearray(b"nopeo")) +alloc.copyout(mv2, a) +dev.synchronize() +print("***** done", bytes(mv2)) +exit(0) +""" + +print("***** import tinygrad") +from tinygrad import Tensor, Device, TinyJit +print("***** access GPU") +dev = Device["GPU"] +print("***** create tensor a") +a = Tensor([1.,2.]*200).realize() +print("***** create tensor b") +b = Tensor([3.,4.]*200).realize() +@TinyJit +def add(a, b): return (a+b).realize() +for i in range(4): + print(f"***** add tensors {i}") + c = add(a, b) + #dev.synchronize() + c = add(b, a) + dev.synchronize() +#print("***** copy out") +#print(c.numpy())