|  | 
| using | Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > | 
|  | 
| using | Shape = Shape_ | 
|  | 
| using | WarpMmaOperator = WarpMmaOperator_ | 
|  | 
| using | OutputTileIterator = OutputTileIterator_ | 
|  | 
| using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ | 
|  | 
| using | WarpTileIterator = WarpTileIterator_ | 
|  | 
| using | SharedLoadIterator = SharedLoadIterator_ | 
|  | 
| using | OutputOp = OutputOp_ | 
|  | 
| using | Padding = Padding_ | 
|  | 
| using | Layout = layout::RowMajor | 
|  | Output layout is always row-major.  More... 
 | 
|  | 
| using | LongIndex = typename Layout::LongIndex | 
|  | 
| using | AccumulatorTile = typename Base::AccumulatorTile | 
|  | The complete warp-level accumulator tile.  More... 
 | 
|  | 
| using | ElementAccumulator = typename WarpTileIterator::Element | 
|  | Accumulator element.  More... 
 | 
|  | 
| using | ElementOutput = typename OutputTileIterator::Element | 
|  | Output element.  More... 
 | 
|  | 
| using | TensorRef = typename OutputTileIterator::TensorRef | 
|  | Tensor reference to destination tensor.  More... 
 | 
|  | 
| using | SyncTensorRef = typename cutlass::TensorRef< int, cutlass::layout::PackedVectorLayout > | 
|  | Tensor reference to sync tensor.  More... 
 | 
|  | 
| using | ConstTensorRef = typename OutputTileIterator::ConstTensorRef | 
|  | Const tensor reference to source tensor.  More... 
 | 
|  | 
| using | OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess > | 
|  | Array type used to output.  More... 
 | 
|  | 
| using | AccumulatorAccessType = Array< typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess > | 
|  | Array type used by output functor.  More... 
 | 
|  | 
| using | WarpCount = typename Base::WarpCount | 
|  | Number of warps.  More... 
 | 
|  | 
| using | Shape = Shape_ | 
|  | 
| using | WarpMmaOperator = WarpMmaOperator_ | 
|  | 
| using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ | 
|  | 
| using | WarpTileIterator = WarpTileIterator_ | 
|  | 
| using | Padding = Padding_ | 
|  | 
| using | Layout = layout::RowMajor | 
|  | Output layout is always row-major.  More... 
 | 
|  | 
| using | AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile | 
|  | The complete warp-level accumulator tile.  More... 
 | 
|  | 
| using | ElementAccumulator = typename AccumulatorTile::Element | 
|  | Accumulator element.  More... 
 | 
|  | 
| using | WarpCount = gemm::GemmShape< Shape::kM/WarpMmaOperator::Shape::kM, Shape::kN/WarpMmaOperator::Shape::kN, kPartitionsK > | 
|  | Number of warps.  More... 
 | 
|  | 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
        
          | using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Base =  EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_> | 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
        
          | using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputAccessType =  Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess> | 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
      
        
          | using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::TensorRef =  typename OutputTileIterator::TensorRef | 
      
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
  
  | 
        
          | CUTLASS_DEVICE cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Epilogue | ( | typename Base::SharedStorage & | shared_storage, |  
          |  |  | int | thread_idx, |  
          |  |  | int | warp_idx, |  
          |  |  | int | lane_idx |  
          |  | ) |  |  |  | inline | 
 
- Parameters
- 
  
    | shared_storage | Shared storage object |  | thread_idx | ID of a thread within the threadblock |  | warp_idx | ID of warp within threadblock |  | lane_idx | Id of thread within warp |  
 
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
  
  | 
        
          | CUTLASS_DEVICE void cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::operator() | ( | OutputOp const & | output_op, |  
          |  |  | OutputTileIterator | destination_iterator, |  
          |  |  | AccumulatorTile const & | accumulators, |  
          |  |  | OutputTileIterator | source_iterator |  
          |  | ) |  |  |  | inline | 
 
< Threadblock tile coordinate in GEMM (in units of threadblock tiles) 
- Parameters
- 
  
    | output_op | Output operator |  | destination_iterator | Tile iterator for destination |  | accumulators | Complete warp-level accumulator tile |  
 
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
  
  | 
        
          | int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kElementsPerAccess = OutputTileIterator::kElementsPerAccess |  | static | 
 
 
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ > 
  
  | 
        
          | int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kPartitionsK = PartitionsK |  | static |