48 namespace threadblock {
    60 class RegularTileIterator<Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment> {
    66   static int const kAdvanceRank = AdvanceRank;
    68   static int const kAlignment = Alignment;
    76   using Fragment = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
    79     "Advance rank may only be along the contiguous or strided dimensions.");
   100   Index increment_strided_;
   103   Index increment_advance_;
   115     pointer_(reinterpret_cast<uint8_t *>(ref.data()) + (ref.offset(
ThreadMap::initial_offset(thread_idx)) * 
sizeof_bits<
Element>::value / 8)) {
   117     stride_ = ref.
stride()[0];
   134     for (
int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
   139       for (
int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
   141         int idx = c + s * ThreadMap::Iterations::kContiguous;
   142         frag_ptr[idx] = access_ptr[c * ThreadMap::Delta::kContiguous];
   145       if (s + 1 < ThreadMap::Iterations::kStrided) {
   146         byte_pointer += increment_strided_;
   154     load_with_pointer_offset(
   156       tile_offset.contiguous() * Shape::kContiguous / ThreadMap::kElementsPerAccess + 
   157         tile_offset.strided() * Shape::kStrided * stride_
   164     load_with_pointer_offset(frag, 0);
   175     for (
int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
   180       for (
int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
   182         int idx = c + s * ThreadMap::Iterations::kContiguous;
   183         access_ptr[c * ThreadMap::Delta::kContiguous] = frag_ptr[idx];
   186       if (s + 1 < ThreadMap::Iterations::kStrided) {
   187         byte_pointer += increment_strided_;
   195     store_with_pointer_offset(
   197       tile_offset.contiguous() * Shape::kContiguous + tile_offset.strided() * Shape::kStrided * stride_
   204     store_with_pointer_offset(frag, 0);
   210     pointer_ += increment_advance_;
   217     pointer_ -= increment_advance_;
   224     pointer_ += pointer_offset;
   231         (coord.contiguous() * Shape::kContiguous + coord.strided() * Shape::kStrided * stride_) / 8;
   232     add_pointer_offset(offset);
   253   static int const kAdvanceRank = AdvanceRank;
   255   static int const kAlignment = Alignment;
   263   using Fragment = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
   269     (kAdvanceRank == 0 ? 1 : 0),
   275     "Advance rank may only be along the row or column dimensions.");
   291     iterator_({ref.
data(), ref.
stride()}, thread_idx) {
   298     iterator_.load_with_pointer_offset(frag, pointer_offset);
   304     iterator_.load_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});
   310     iterator_.load_with_pointer_offset(frag, 0);
   316     iterator_.store_with_pointer_offset(frag, pointer_offset);
   322     iterator_.store_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});
   328     iterator_.store_with_pointer_offset(frag, 0);
   348     iterator_.add_pointer_offset(pointer_offset);
   354     iterator_.add_tile_offset({coord.column(), coord.row()});
   375   static int const kAdvanceRank = AdvanceRank;
   377   static int const kAlignment = Alignment;
   385   using Fragment = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
   391     (kAdvanceRank == 0 ? 0 : 1),
   396     "Advance rank may only be along the row or column dimensions.");
   412     iterator_({ref.
data(), ref.
stride()}, thread_idx) {
   419     iterator_.load_with_pointer_offset(frag, pointer_offset);
   425     iterator_.load_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});
   431     iterator_.load_with_pointer_offset(frag, 0);
   437     iterator_.store_with_pointer_offset(frag, pointer_offset);
   443     iterator_.store_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});
   449     iterator_.store_with_pointer_offset(frag, 0);
   469     iterator_.add_pointer_offset(pointer_offset);
   475     iterator_.add_tile_offset({coord.row(), coord.column()});
 
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:62
Definition: aligned_buffer.h:35
Coordinate in pitch-linear space. 
Definition: pitch_linear.h:52
Defines a structure containing strides, bounds, and a pointer to tensor data. 
CUTLASS_HOST_DEVICE Element * data() const 
Returns the pointer to referenced data. 
Definition: tensor_ref.h:254
Mapping function for pitch-linear memory. 
Definition: pitch_linear.h:163
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:154
Aligned array type. 
Definition: array.h:511
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:59
int64_t LongIndex
Long index type used for offsets. 
Definition: pitch_linear.h:175
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the layout object's stride vector. 
Definition: tensor_ref.h:277
Defines the size of an element in bits. 
Definition: numeric_types.h:42
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
int32_t Index
Index type used for coordinates. 
Definition: pitch_linear.h:172
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
Templates implementing storing of tiles from pitch-linear rank=2 tensors. 
Defines layout functions used by TensorRef and derived classes. 
Defines layout functions used by TensorRef and derived classes for pitch-linear memory. 
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:151
Basic include for CUTLASS. 
Definition: matrix_coord.h:39