46 namespace threadblock {
    57 template <
typename Shape_, 
typename Element_, 
int AdvanceRank,
    58           typename ThreadMap_, 
int Alignment>
    62     AdvanceRank, ThreadMap_, Alignment> {
    65       AdvanceRank == 0 || AdvanceRank == 1,
    66       "Specialization for pitch-linear iterator may along advance along the "    67       "contiguous(rank=0) or strided(rank=1) dimension.");
    72   static int const kAdvanceRank = AdvanceRank;
    73   static int const kAlignment = Alignment;
    84   using AccessType = Array<Element, ThreadMap::kElementsPerAccess>;
   101   int iteration_contiguous_;
   104   int iteration_strided_;
   112       : stride_(ref.stride(0) / 
ThreadMap::kElementsPerAccess),
   120     set_iteration_index(0);
   126     iteration_contiguous_ = index % ThreadMap::Iterations::kContiguous;
   127     iteration_strided_ = index / ThreadMap::Iterations::kContiguous;
   133     byte_offset_ += pointer_offset * 
sizeof(
Element);
   142     int access_offset = iteration_strided_ * ThreadMap::Delta::kStrided * stride_ +
   143                         iteration_contiguous_ * ThreadMap::Delta::kContiguous /
   144                             ThreadMap::kElementsPerAccess;
   146     char *access_byte_ptr =
   147         reinterpret_cast<char *
>(access_ptr + access_offset);
   149     return reinterpret_cast<AccessType *
>(access_byte_ptr + byte_offset_);
   155     ++iteration_contiguous_;
   157     if (iteration_contiguous_ < ThreadMap::Iterations::kContiguous)
   162     iteration_contiguous_ = 0;
   163     ++iteration_strided_;
   165     if (iteration_strided_ < ThreadMap::Iterations::kStrided) {
   171     iteration_strided_ = 0;
   188     add_pointer_offset(coord.contiguous() * Shape::kContiguous +
   189                        coord.strided() * Shape::kStrided * stride_ *
   190                            ThreadMap::kElementsPerAccess);
   203 template <
typename Shape_, 
typename Element_, 
int AdvanceRank,
   204           typename ThreadMap_, 
int Alignment>
   208     AdvanceRank, ThreadMap_, Alignment> {
   211       AdvanceRank == 0 || AdvanceRank == 1,
   212       "Specialization for pitch-linear iterator may along advance along the "   213       "contiguous(rank=0) or strided(rank=1) dimension.");
   218   static int const kAdvanceRank = AdvanceRank;
   219   static int const kAlignment = Alignment;
   233       (kAdvanceRank == 0 ? 0 : 1), 
   249       : iterator_({ref.
data(), ref.
stride()}, thread_id) {}
   258     iterator_.add_pointer_offset(pointer_offset);
   264     return reinterpret_cast<AccessType *
>(iterator_.get());
   270     iterator_.add_tile_offset({coord.row(), coord.column()});
   300 template <
typename Shape_, 
typename Element_, 
int AdvanceRank,
   301           typename ThreadMap_, 
int Alignment>
   305     AdvanceRank, ThreadMap_, Alignment> {
   308       AdvanceRank == 0 || AdvanceRank == 1,
   309       "Specialization for pitch-linear iterator may along advance along the "   310       "contiguous(rank=0) or strided(rank=1) dimension.");
   315   static int const kAdvanceRank = AdvanceRank;
   316   static int const kAlignment = Alignment;
   330       (kAdvanceRank == 0 ? 1 : 0), 
   346       : iterator_({ref.
data(), ref.
stride()}, thread_id) {}
   355     iterator_.add_pointer_offset(pointer_offset);
   361     return reinterpret_cast<AccessType *
>(iterator_.get());
   367     iterator_.add_tile_offset({coord.column(), coord.row()});
 
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:62
Definition: aligned_buffer.h:35
Coordinate in pitch-linear space. 
Definition: pitch_linear.h:52
Defines a structure containing strides, bounds, and a pointer to tensor data. 
CUTLASS_HOST_DEVICE Element * data() const 
Returns the pointer to referenced data. 
Definition: tensor_ref.h:254
Mapping function for pitch-linear memory. 
Definition: pitch_linear.h:163
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:154
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:59
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694
int64_t LongIndex
Long index type used for offsets. 
Definition: pitch_linear.h:175
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the layout object's stride vector. 
Definition: tensor_ref.h:277
Defines a Shape template for matrix tiles. 
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE LongIndex offset(TensorCoord const &coord) const 
Computes the offset of an index from the origin of the tensor. 
Definition: tensor_ref.h:301
int32_t Index
Index type used for coordinates. 
Definition: pitch_linear.h:172
Templates implementing the address computation of storing of tiles from pitch-linear rank=2 tensors...
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
Defines a canonical coordinate for rank=2 matrices offering named indices. 
Defines layout functions used by TensorRef and derived classes. 
Defines layout functions used by TensorRef and derived classes for pitch-linear memory. 
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:151
Basic include for CUTLASS. 
Definition: matrix_coord.h:39