|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
Partial specialization for row-major.
#include <volta_tensor_op_policy.h>
| Public Types | |
| using | WarpShape = WarpShape_ | 
| using | InterleavedTileShape = gemm::GemmShape< 32, 32, 4 > | 
| using | ElementC = float | 
| using | Layout = layout::RowMajor | 
| using | InstructionShape = gemm::GemmShape< 16, 16, 4 > | 
| Shape of one warp-levelinstruction.  More... | |
| using | MmaIterations = MatrixShape< InterleavedTileShape::kM/InstructionShape::kM, InterleavedTileShape::kN/InstructionShape::kN > | 
| Number of mma operations performed for one 32x32x4 interleaved tile.  More... | |
| using | TileIterations = MatrixShape< WarpShape::kM/InterleavedTileShape::kM, WarpShape::kN/InterleavedTileShape::kN > | 
| Number of 32x32x4 interleaved tiles performed to cover the warp-level GEMM shape.  More... | |
| using | AccessType = AlignedArray< ElementC, kElementsPerAccess > | 
| Array type for aligned memory accesses.  More... | |
| using | Fragment = Array< ElementC, kElementsPerAccess *kAccessesPerInterleavedTile *TileIterations::kColumn > | 
| This is the fragment size produced by one access of the iterator.  More... | |
| using | AccumulatorTile = Array< ElementC, TileIterations::kCount *MmaIterations::kCount *kElementsPerMma > | 
| This is the complete warp-level accumulator tile.  More... | |
| Static Public Attributes | |
| static int const | kElementsPerMma = 8 | 
| Number of accumulator elements owned by each thread per Mma.  More... | |
| static int const | kRowsPerIteration = 16 | 
| static int const | kElementsPerAccess = 2 | 
| Number of accumulator elements stored per memory instruction to shared memory.  More... | |
| static int const | kAccessesPerInterleavedTile = 8 | 
| Number of accesses performed per interleaved tile.  More... | |
| static int const | kRowsPerMmaTile = 2 | 
| Number of rows per interleaved tile.  More... | |
| static int const | kIterations = TileIterations::kRow * MmaIterations::kRow | 
| Total number of iterations needed to cover the entire tile.  More... | |
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::AccessType = AlignedArray<ElementC, kElementsPerAccess> | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::AccumulatorTile = Array< ElementC, TileIterations::kCount * MmaIterations::kCount * kElementsPerMma> | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::ElementC = float | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Fragment = Array< ElementC, kElementsPerAccess * kAccessesPerInterleavedTile * TileIterations::kColumn> | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::InstructionShape = gemm::GemmShape<16, 16, 4> | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::InterleavedTileShape = gemm::GemmShape<32, 32, 4> | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Layout = layout::RowMajor | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::MmaIterations = MatrixShape< InterleavedTileShape::kM / InstructionShape::kM, InterleavedTileShape::kN / InstructionShape::kN > | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::TileIterations = MatrixShape< WarpShape::kM / InterleavedTileShape::kM, WarpShape::kN / InterleavedTileShape::kN > | 
| using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::WarpShape = WarpShape_ | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11