48 namespace threadblock {
    64   using Shape = 
typename ThreadMap::Shape;
    82   static int const kThreads = ThreadMap::kThreads;
    87     ThreadMap::Iterations::kColumn * 
    88     ThreadMap::Iterations::kRow * 
    89     ThreadMap::Iterations::kGroup * 
    90     ThreadMap::Iterations::kCluster * 
    91     ThreadMap::kElementsPerAccess>;
    96     ThreadMap::kElementsPerAccess, 
   106   uint8_t *byte_pointer_;
   123     byte_pointer_(reinterpret_cast<uint8_t *>(ref.data())),
   124     stride_((ref.stride(0) * 
sizeof_bits<Element>::value) / 8) {
   126     TensorCoord thread_offset = ThreadMap::initial_offset(thread_idx);
   130       thread_offset.
row() * stride_ + 
   133     int byte_offset = thread_offset.
row() * stride_ + 
   155     for (
int cluster = 0; cluster < ThreadMap::Iterations::kCluster; ++cluster) {
   158       for (
int group = 0; group < ThreadMap::Iterations::kGroup; ++group) {
   161         for (
int row = 0; row < ThreadMap::Iterations::kRow; ++row) {
   163           uint8_t 
const *byte_pointer = byte_pointer_ + 
   164             row * ThreadMap::Delta::kRow * stride_ + 
   165             group * ThreadMap::Delta::kGroup* stride_ + 
   166             cluster * ThreadMap::Delta::kCluster * stride_ +
   170             (row + ThreadMap::Iterations::kRow * (group + ThreadMap::Iterations::kGroup * cluster));
   175           for (
int column = 0; column < ThreadMap::Iterations::kColumn; ++column) {
   177             int frag_idx = frag_row_idx * ThreadMap::Iterations::kColumn + column;
 int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:62
CUTLASS_HOST_DEVICE Index const & column() const 
Returns the column of the coordinate. 
Definition: matrix_coord.h:85
Array< Element, ThreadMap::Iterations::kColumn *ThreadMap::Iterations::kRow *ThreadMap::Iterations::kGroup *ThreadMap::Iterations::kCluster *ThreadMap::kElementsPerAccess > Fragment
Fragment object. 
Definition: shared_load_iterator.h:91
Definition: aligned_buffer.h:35
static int const value
Definition: numeric_types.h:43
Defines a structure containing strides, bounds, and a pointer to tensor data. 
CUTLASS_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment from memory. 
Definition: shared_load_iterator.h:150
static int const kThreads
Definition: shared_load_iterator.h:82
Aligned array type. 
Definition: array.h:511
CUTLASS_DEVICE SharedLoadIterator(TensorRef ref, int thread_idx)
Constructor. 
Definition: shared_load_iterator.h:119
CUTLASS_HOST_DEVICE Index const & row() const 
Returns the row of the coordinate. 
Definition: matrix_coord.h:77
static int const kMinAlignment
Definition: shared_load_iterator.h:78
typename TensorRef::ConstTensorRef ConstTensorRef
Definition: shared_load_iterator.h:70
TensorRef< typename platform::remove_const< Element >::type const, Layout > ConstTensorRef
TensorRef to constant data. 
Definition: tensor_ref.h:179
ThreadMap_ ThreadMap
Definition: shared_load_iterator.h:63
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:59
static int const kAlignment
Definition: shared_load_iterator.h:80
Defines a Shape template for matrix tiles. 
Defines the size of an element in bits. 
Definition: numeric_types.h:42
AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment > AccessType
Memory access size. 
Definition: shared_load_iterator.h:97
typename Layout::Index Index
Definition: shared_load_iterator.h:72
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Top-level include for all CUTLASS numeric types. 
Metaprogram for determining the mapping of output elements to threads for epilogue tiles...
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
CUTLASS_DEVICE void load(Fragment &frag)
Loads a fragment. 
Definition: shared_load_iterator.h:189
Element_ Element
Definition: shared_load_iterator.h:66
Defines layout functions used by TensorRef and derived classes. 
typename ThreadMap::Shape Shape
Definition: shared_load_iterator.h:64
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element. 
Definition: shared_load_iterator.h:139
static int const kElementsPerAccess
Definition: shared_load_iterator.h:76
Definition: shared_load_iterator.h:61
typename Layout::LongIndex LongIndex
Definition: shared_load_iterator.h:73
Basic include for CUTLASS. 
Definition: matrix_coord.h:39
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &offset)
Definition: shared_load_iterator.h:144