41 template <
typename LayoutA, 
typename LayoutB, 
typename LayoutC>
    43   gemm::GemmShape<1,1,4>,
    58     Array<int8_t, 4> 
const &a,
    59     Array<int8_t, 4> 
const &b,
    60     Array<int, 1> 
const &c
    63 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 610))    65     unsigned const &A = 
reinterpret_cast<unsigned const &
>(a);
    66     unsigned const &B = 
reinterpret_cast<unsigned const &
>(b);
    68     asm volatile(
"dp4a.s32.s32 %0, %1, %2, %3;"    70                  : 
"r"(A), 
"r"(B), 
"r"(c[0]));
    77     for (
int k = 0; k < 4; ++k) {
    88 template <
typename LayoutC>
    90   gemm::GemmShape<1, 1, 2>,
   105     Array<int16_t, 2> 
const &a,
   106     Array<int16_t, 2> 
const &b,
   107     Array<int, 1> 
const &c
   110 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 610))   112     unsigned const &A = 
reinterpret_cast<unsigned const &
>(a);
   113     unsigned const &B = 
reinterpret_cast<unsigned const &
>(b);
   115     asm volatile(
"dp2a.s32.s32 %0, %1, %2, %3;"   117                  : 
"r"(A), 
"r"(B), 
"r"(c[0]));
   122     for (
int k = 0; k < 2; ++k) {
 Definition: aligned_buffer.h:35
cutlass::arch::Mma< gemm::GemmShape< 1, 1, 4 >, 1, int8_t, LayoutA, int8_t, LayoutB, int, LayoutC, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(Array< int, 1 > &d, Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, Array< int, 1 > const &c)
Definition: arch/mma_sm61.h:56
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Shape of a matrix multiply-add operation. 
Definition: include/cutlass/gemm/gemm.h:57
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
Defines layout functions used by TensorRef and derived classes. 
Matrix multiply-add operation. 
Definition: arch/mma.h:92
cutlass::arch::Mma< gemm::GemmShape< 1, 1, 2 >, 1, int16_t, layout::RowMajor, int16_t, layout::ColumnMajor, int, LayoutC, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(Array< int, 1 > &d, Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, Array< int, 1 > const &c)
Definition: arch/mma_sm61.h:103