|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
#include <default_mma_core_sm75.h>
| Static Public Attributes | |
| static int const | kInterleavedK = InterleavedK | 
| static int const | kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value | 
| Number of threads per warp.  More... | |
| static int const | kThreads = WarpCount::kCount * kWarpSize | 
| Number of threads total.  More... | |
| static int const | kAccessSizeInBits = 128 | 
| Size of a threadblock-scoped access.  More... | |
| static int const | kElementsPerAccess | 
| static int const | kWarpThreadArrangementContiguous | 
| static int const | kWarpThreadArrangementStrided | 
Partial specialization:
A: column-major-interleave32 B: row-major-interleave32 Operator: tensor op class
This uses the default warp-level operator given tile sizes
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::ElementA = ElementA_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::ElementB = ElementB_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::ElementC = ElementC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::InstructionShape = InstructionShape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::IteratorThreadMapA = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kM * kInterleavedK, Shape::kK / kInterleavedK>, kThreads, layout::PitchLinearShape<32, 1>, kElementsPerAccess> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::IteratorThreadMapB = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kN * kInterleavedK, Shape::kK / kInterleavedK>, kThreads, layout::PitchLinearShape<32, 1>, kElementsPerAccess> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::LayoutA = layout::ColumnMajorInterleaved<InterleavedK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::LayoutB = layout::RowMajorInterleaved<InterleavedK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::LayoutC = LayoutC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::MmaPolicy = MmaPolicy<MmaTensorOp, MatrixShape<0, 0>, MatrixShape<0, 0>, WarpCount::kK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::MmaTensorOp = typename cutlass::gemm::warp::DefaultMmaTensorOp< WarpShape, InstructionShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Operator, WarpCount::kK, AccumulatorsInRowMajor>::Type | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::Operator = Operator_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::OperatorClass = arch::OpClassTensorOp | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::Shape = Shape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 0, SmemThreadMapA> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 1, SmemThreadMapB> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemLayoutA = layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits<ElementA>::value, kInterleavedK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemLayoutB = layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits<ElementB>::value, kInterleavedK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemThreadMapA = transform::TransposePitchLinearThreadMap< IteratorThreadMapA, layout::PitchLinearShape<kWarpThreadArrangementContiguous, kWarpThreadArrangementStrided>> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemThreadMapB = transform::TransposePitchLinearThreadMap< IteratorThreadMapB, layout::PitchLinearShape<kWarpThreadArrangementContiguous, kWarpThreadArrangementStrided>> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::WarpCount = GemmShape<Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, Shape::kK / WarpShape::kK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::WarpShape = WarpShape_ | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11