58   typename MmaSimtPolicy          
    68   typename MmaSimtPolicy_  
    74   using Operator = Operator_;
    82     typename Operator::ElementC, 
    83     Policy::kElementsPerIteration>;
    87     typename Operator::ElementC, 
    88     Policy::kAccumulatorElementCount>;
    93   static int const kIterations = Policy::kIterations;
    98   using AccessType = Array<typename Operator::ElementC, Policy::kElementsPerAccess>;
   107   AccessType 
const *accumulators_;
   117     accumulators_(reinterpret_cast<AccessType const *>(&accum)), 
   140     AccessType *frag_ptr = 
reinterpret_cast<AccessType *
>(&frag);
   143     for (
int n = 0; n < Policy::kAccessesPerIteration; ++n) {
   145       int accumulator_access_offset = index_ * Policy::kAccessesPerIteration + n;
   147       frag_ptr[n] = accumulators_[accumulator_access_offset];
 Definition: aligned_buffer.h:35
AccumulatorTile OutputAccumulatorTile
Definition: fragment_iterator_simt.h:90
Definition: simt_policy.h:50
WarpShape_ WarpShape
Definition: fragment_iterator_simt.h:73
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Fragment iterator for SIMT accumulator arrangements. 
Definition: fragment_iterator_simt.h:60
CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const 
Loads a fragment from the referenced part of the accumulator tile. 
Definition: fragment_iterator_simt.h:138
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Array< typename Operator::ElementC, Policy::kAccumulatorElementCount > AccumulatorTile
This is the complete warp-level accumulator tile. 
Definition: fragment_iterator_simt.h:88
Array< typename Operator::ElementC, Policy::kElementsPerIteration > Fragment
This is the fragment size produced by one access of the iterator. 
Definition: fragment_iterator_simt.h:83
CUTLASS_HOST_DEVICE FragmentIteratorSimt & operator--()
Decrements. 
Definition: fragment_iterator_simt.h:131
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE FragmentIteratorSimt(AccumulatorTile const &accum)
Constructs an iterator. 
Definition: fragment_iterator_simt.h:116
Defines layout functions used by TensorRef and derived classes. 
Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of SimtOp instructions, of which a row-oriented slice is visible per iteration. 
CUTLASS_HOST_DEVICE FragmentIteratorSimt & operator++()
Increments. 
Definition: fragment_iterator_simt.h:124