54 namespace threadblock {
    61   typename WarpMmaOperator_,                
    63   typename AccumulatorFragmentIterator_,    
    64   typename WarpTileIterator_,               
    89     Shape::kM / WarpMmaOperator::Shape::kM,
    90     Shape::kN / WarpMmaOperator::Shape::kN,
   104     using Element = 
typename WarpTileIterator::Element;
   110     using Layout = 
typename WarpTileIterator::Layout;
   137       return storage.
data();
   150       if (threadIdx.x == 0) {
   188     shared_storage_(shared_storage),
   189     warp_tile_iterator_(shared_storage.
reference(), lane_idx) {
   204     warp_tile_iterator_.add_tile_offset(warp_offset);
 static int const kM
Definition: include/cutlass/gemm/gemm.h:58
Describes the size of a matrix tile. 
Definition: matrix_shape.h:42
Definition: aligned_buffer.h:35
CUTLASS_DEVICE void debug_print()
Definition: epilogue_base.h:149
static int const kColumn
columns of a matrix 
Definition: matrix_shape.h:44
WarpTileIterator warp_tile_iterator_
Stores a warp's fragment of accumulators to SMEM. 
Definition: epilogue_base.h:176
SharedStorage & shared_storage_
Definition: epilogue_base.h:173
Templates implementing how threads are mapped to a given tile. 
WarpMmaOperator_ WarpMmaOperator
Definition: epilogue_base.h:71
Shared storage allocation needed by the epilogue. 
Definition: epilogue_base.h:97
CUTLASS_DEVICE Element * data()
Returns a pointer to the shared memory buffer. 
Definition: epilogue_base.h:136
Defines common types used for all GEMM-like operators. 
typename AccumulatorTile::Element ElementAccumulator
Accumulator element. 
Definition: epilogue_base.h:84
typename WarpTileIterator::TensorRef TensorRef
Tensor reference to shared memory allocation. 
Definition: epilogue_base.h:107
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
static int const kK
Definition: include/cutlass/gemm/gemm.h:60
Defines layout functions used by TensorRef and derived classes for common 4-D and 5-D tensor formats...
Defines a Shape template for matrix tiles. 
static int const kPartitionsK
Definition: epilogue_base.h:72
typename WarpTileIterator::Element Element
Element type of shared memory. 
Definition: epilogue_base.h:104
Defines a canonical coordinate for rank=4 tensors offering named indices. 
AlignedBuffer< Element, StorageShape::kCount > storage
Definition: epilogue_base.h:128
static int const kRow
rows of a matrix 
Definition: matrix_shape.h:43
AlignedBuffer is a container for trivially copyable elements suitable for use in unions and shared me...
Top-level include for all CUTLASS numeric types. 
Modifies semantics of cutlass::Array<> to provide guaranteed alignment. 
Definition: aligned_buffer.h:45
Shape of a matrix multiply-add operation. 
Definition: include/cutlass/gemm/gemm.h:57
CUTLASS_HOST_DEVICE pointer data()
Definition: aligned_buffer.h:84
typename WarpTileIterator::Layout Layout
Layout of shared memory allocation. 
Definition: epilogue_base.h:110
AccumulatorFragmentIterator_ AccumulatorFragmentIterator
Definition: epilogue_base.h:73
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
Defines layout functions used for rank=1 vectors. 
Shape_ Shape
Definition: epilogue_base.h:70
Base class for epilogues defining warp-level. 
Definition: epilogue_base.h:67
static CUTLASS_HOST_DEVICE RowMajor packed(MatrixCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: layout/matrix.h:93
Padding_ Padding
Definition: epilogue_base.h:75
CUTLASS_DEVICE EpilogueBase(SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
Constructor. 
Definition: epilogue_base.h:182
WarpTileIterator_ WarpTileIterator
Definition: epilogue_base.h:74
typename AccumulatorFragmentIterator::AccumulatorTile AccumulatorTile
The complete warp-level accumulator tile. 
Definition: epilogue_base.h:81
Basic include for CUTLASS. 
Definition: matrix_coord.h:39
CUTLASS_DEVICE TensorRef reference()
Returns a tensor reference to the shared memory buffer. 
Definition: epilogue_base.h:142
static int const kN
Definition: include/cutlass/gemm/gemm.h:59