|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
Gemplate that handles conventional layouts for IDP4A.
#include <mma_sm61.h>
| Public Types | |
| using | Shape = Shape_ | 
| Size of the Gemm problem - concept: gemm::GemmShape<>  More... | |
| using | ElementA = int8_t | 
| Data type of operand A.  More... | |
| using | LayoutA = layout::RowMajor | 
| Layout of A matrix (concept: layout::MapFunc)  More... | |
| using | ElementB = int8_t | 
| Data type of operand B.  More... | |
| using | LayoutB = layout::ColumnMajor | 
| Layout of B matrix (concept: layout::MapFunc)  More... | |
| using | ElementC = int32_t | 
| Element type of operand C.  More... | |
| using | LayoutC = LayoutC_ | 
| Layout of C matrix (concept: layout::MapFunc)  More... | |
| using | Operator = arch::OpMultiplyAdd | 
| Underlying mathematical operator.  More... | |
| using | FragmentA = Array< ElementA, Shape::kMK > | 
| A operand storage.  More... | |
| using | FragmentB = Array< ElementB, Shape::kKN > | 
| B operand storage.  More... | |
| using | FragmentC = Array< ElementC, Shape::kMN > | 
| C operand storage.  More... | |
| Public Member Functions | |
| CUTLASS_HOST_DEVICE void | operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C) | 
| Computes a matrix product D = A * B + C.  More... | |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementA = int8_t | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementB = int8_t | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementC = int32_t | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentA = Array<ElementA, Shape::kMK> | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentB = Array<ElementB, Shape::kKN> | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentC = Array<ElementC, Shape::kMN> | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutA = layout::RowMajor | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutB = layout::ColumnMajor | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutC = LayoutC_ | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Operator = arch::OpMultiplyAdd | 
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Shape = Shape_ | 
| 
 | inline | 
Use 1x1x4 IDP4A sequence for bulk of computation
 1.8.11
 1.8.11