mirror of https://github.com/commaai/tinygrad.git
start uop docs (#6291)
* start uop docs * only need show_labels * sink comes first * hotfix: invalid * touchups * 2 space indent works * limit some buffer uops * better BARRIER doc, Op -> UOp when it makes sense. * make KernelInfo optional * more work relative links don't work * this can be local in multi reduce+pads * add UOps.SHAPETRACKER details * UOps.CONST both types * nit: local buffer isn't device Buffer, habit * nit2: dtype -> DType
This commit is contained in:
parent
dd4e5f1c8d
commit
8c50ef8b7c
|
@ -0,0 +1,11 @@
|
|||
::: tinygrad.ops.UOp
|
||||
options:
|
||||
members: false
|
||||
members_order: source
|
||||
show_labels: false
|
||||
|
||||
::: tinygrad.ops.UOps
|
||||
options:
|
||||
members: true
|
||||
members_order: source
|
||||
show_labels: false
|
|
@ -23,6 +23,7 @@ nav:
|
|||
- Developer:
|
||||
- Intro: developer/developer.md
|
||||
- Function (autodiff): developer/function.md
|
||||
- UOp: developer/uop.md
|
||||
- Runtime:
|
||||
- developer/runtime.md
|
||||
- HCQ: developer/hcq.md
|
||||
|
|
167
tinygrad/ops.py
167
tinygrad/ops.py
|
@ -40,20 +40,165 @@ def identity_element(op:BinaryOps, dt:DType): return dtypes.as_const({BinaryOps.
|
|||
|
||||
# the order of these UOps controls the order of the toposort
|
||||
class UOps(Enum):
|
||||
# ops that aren't rendered
|
||||
SINK = auto(); EXT = auto(); EXPAND = auto(); CONTRACT = auto(); SHAPETRACKER = auto(); SWIZZLE = auto() # noqa: E702
|
||||
DEFINE_GLOBAL = auto(); DEFINE_VAR = auto(); DEFINE_LOCAL = auto(); DEFINE_ACC = auto() # noqa: E702
|
||||
CONST = auto(); SPECIAL = auto() # noqa: E702
|
||||
NOOP = auto(); GEP = auto() # noqa: E702
|
||||
# uops that aren't rendered
|
||||
SINK = auto()
|
||||
"""
|
||||
Holds `UOps.STORE`. SINK defines the AST for a Kernel.
|
||||
|
||||
- **`dtype`**: `None`
|
||||
- **`src`**: `Tuple[UOp, ...]`, Only local STOREs are allowed.
|
||||
- **`arg`**: `Optional[KernelInfo]`
|
||||
|
||||
NOTE: `ScheduleItem` ASTs do not have the `KernelInfo` arg, `Kernel` inserts this to the SINK later.
|
||||
"""
|
||||
EXT = auto()
|
||||
"""
|
||||
Holds a single MetaOp. EXT UOps do not need a Kernel.
|
||||
|
||||
- **`dtype`**: Output DType
|
||||
- **`src`**: `Tuple[]`
|
||||
- **`arg`**: (`MetaOps.CUSTOM | MetaOps.COPY | MetaOps.EMPTY | MetaOps.VIEW`, LazyBuffer arg)
|
||||
"""
|
||||
EXPAND = auto()
|
||||
CONTRACT = auto()
|
||||
SHAPETRACKER = auto()
|
||||
"""
|
||||
Defines the ShapeTracker for a buffer UOp `UOps.LOAD`, `UOps.STORE` or `UOps.CONST`.
|
||||
|
||||
- **`dtype`**: `None`
|
||||
- **`src`**: `Tuple[]`
|
||||
- **`arg`**: `ShapeTracker`
|
||||
"""
|
||||
SWIZZLE = auto()
|
||||
DEFINE_GLOBAL = auto()
|
||||
DEFINE_VAR = auto()
|
||||
DEFINE_LOCAL = auto()
|
||||
DEFINE_ACC = auto()
|
||||
CONST = auto()
|
||||
"""
|
||||
Defines a single scalar constant value.
|
||||
|
||||
- **`dtype`**: The scalar DType of the value.
|
||||
|
||||
- **`src`**:
|
||||
The scheduler creates a CONST with a single SHAPETRACKER UOp src: `Tuple[UOp]`.
|
||||
|
||||
The Lowerer replaces the SHAPETRACKER with an empty src.
|
||||
It uses the ShapeTracker valid to create a `WHERE` UOp mask with sources: `(The actual CONST UOp, CONST 0, 0.0 or False)`
|
||||
|
||||
- **`arg`**: The value.
|
||||
"""
|
||||
SPECIAL = auto()
|
||||
NOOP = auto()
|
||||
GEP = auto()
|
||||
# math ops
|
||||
CAST = auto(); BITCAST = auto(); VECTORIZE = auto() # noqa: E702
|
||||
ALU = auto(); REDUCE = auto(); REDUCE_AXIS = auto(); WMMA = auto() # noqa: E702
|
||||
CAST = auto()
|
||||
"""
|
||||
- **`dtype`**: The casted scalar DType
|
||||
- **`src`**: `Tuple[UOp]`
|
||||
- **`arg`**: `None`
|
||||
"""
|
||||
BITCAST = auto()
|
||||
"""
|
||||
- **`dtype`**: The bitcasted scalar DType
|
||||
- **`src`**: `Tuple[UOp]`
|
||||
- **`arg`**: `None`
|
||||
"""
|
||||
VECTORIZE = auto()
|
||||
"""
|
||||
- **`dtype`**: The upcasted vector DType
|
||||
- **`src`**: `Tuple[UOp, ...]`
|
||||
- **`arg`**: `None`
|
||||
|
||||
NOTE: Length of sources must match `dtype.count`
|
||||
"""
|
||||
ALU = auto()
|
||||
"""
|
||||
- **`dtype`**: Output DType
|
||||
- **`src`**: `Tuple[UOp] | Tuple[UOp, UOp] | Tuple[UOp, UOp, UOp]`
|
||||
- **`arg`**: `UnaryOps | BinaryOps | TernaryOps`
|
||||
"""
|
||||
REDUCE = auto()
|
||||
REDUCE_AXIS = auto()
|
||||
"""
|
||||
- **`dtype`**: Output DType
|
||||
- **`src`**: Input to reduce `Tuple[UOp]`
|
||||
- **`arg`**: `(BinaryOps.ADD | BinaryOps.MUL | BinaryOps.MAX, Tuple[int, ...])`
|
||||
"""
|
||||
WMMA = auto()
|
||||
# memory/assignment ops
|
||||
LOAD = auto(); STORE = auto(); PHI = auto() # noqa: E702
|
||||
LOAD = auto()
|
||||
"""
|
||||
- **`dtype`**: Output DType
|
||||
- **`src`**:
|
||||
|
||||
The scheduler and Kernel create LOADs with a SHAPETRACKER uop in src.
|
||||
|
||||
- Normal LOAD: `Tuple[UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_GLOBAL`.
|
||||
- SHAPETRACKER UOp.
|
||||
|
||||
- Local LOAD: `Tuple[UOp, UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_LOCAL`.
|
||||
- SHAPETRACKER UOp.
|
||||
- Local UOps.STORE to the same local buffer. We will barrier this later.
|
||||
|
||||
The Lowerer replaces the SHAPETRACKER with an indexing uop and gates the LOAD if needed.
|
||||
|
||||
- Normal LOAD: `Tuple[UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_GLOBAL`.
|
||||
- Indexing UOp, can only return `dtypes.int32`.
|
||||
- Gated LOAD: `Tuple[UOp, UOp, UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_GLOBAL`.
|
||||
- Indexing UOp, can only return `dtypes.int32`.
|
||||
- Gate UOp, can only return `dtypes.bool`.
|
||||
- Value if gate is `False`, can only be a `UOps.CONST` with arg 0, 0.0 or `False`.
|
||||
- Barriered LOAD: `Tuple[UOp, UOp, UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_LOCAL`.
|
||||
- Indexing UOp, can only return `dtypes.int32`.
|
||||
- Gate UOp, can only return `dtypes.bool`.
|
||||
- Barrier UOp `UOps.BARRIER`.
|
||||
- **`arg`**: `None`
|
||||
"""
|
||||
STORE = auto()
|
||||
"""
|
||||
- **`dtype`**: `None`
|
||||
- **`src`**:
|
||||
|
||||
Similar to LOAD, the scheduler and Kernel create STOREs with a SHAPETRACKER uop in src:
|
||||
|
||||
- Buffer UOp `UOps.DEFINE_GLOBAL` or `UOps.DEFINE_LOCAL`.
|
||||
- SHAPETRACKER UOp.
|
||||
- Value to store.
|
||||
|
||||
The Lowerer replaces the SHAPETRACKER with an indexing uop and gates the STORE if needed.
|
||||
|
||||
- Normal STORE: `Tuple[UOp, UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_GLOBAL` or `UOps.DEFINE_LOCAL`.
|
||||
- Indexing Op, can only return `dtypes.int32`.
|
||||
- Value to store.
|
||||
- Gated STORE: `Tuple[UOp, UOp, UOp, UOp]`
|
||||
- Buffer UOp `UOps.DEFINE_GLOBAL` or `UOps.DEFINE_LOCAL`.
|
||||
- Indexing UOp, can only return `dtypes.int32`.
|
||||
- Value to store.
|
||||
- Gate UOp, can only return `dtypes.bool`.
|
||||
- **`arg`**: `None`
|
||||
"""
|
||||
PHI = auto()
|
||||
# control flow ops
|
||||
BARRIER = auto(); IF = auto(); RANGE = auto() # noqa: E702
|
||||
# these two are not graph nodes
|
||||
ENDRANGE = auto(); ENDIF = auto() # noqa: E702
|
||||
BARRIER = auto()
|
||||
"""
|
||||
Inserts a warp sync between local stores and local loads.
|
||||
|
||||
- **`dtype`**: `None`
|
||||
- **`src`**: `Tuple[UOp, ...]`, Only local STOREs are allowed.
|
||||
- **`arg`**: `None`
|
||||
"""
|
||||
IF = auto()
|
||||
RANGE = auto()
|
||||
# ops that are not graph nodes
|
||||
ENDRANGE = auto()
|
||||
ENDIF = auto()
|
||||
|
||||
BUFFER_UOPS = {UOps.LOAD, UOps.STORE, UOps.CONST}
|
||||
|
||||
|
|
Loading…
Reference in New Issue