52 namespace threadblock {
    81                       ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_
    99     Shape::kM / WarpShape::kM,
   100     Shape::kN / WarpShape::kN,
   101     Shape::kK / WarpShape::kK
   106     !(Shape::kM % WarpShape::kM) &&
   107     !(Shape::kN % WarpShape::kN),
   108     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   115   static int const kThreads = WarpCount::kCount * kWarpSize;
   118   static int const kAccessSizeInBits = 128;
   158     layout::PitchLinearShape<8, 4>,
   186       cutlass::arch::OpMultiplyAdd
   236                       ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_
   254     Shape::kM / WarpShape::kM,
   255     Shape::kN / WarpShape::kN,
   256     Shape::kK / WarpShape::kK
   261     !(Shape::kM % WarpShape::kM) &&
   262     !(Shape::kN % WarpShape::kN),
   263     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   270   static int const kThreads = WarpCount::kCount * kWarpSize;
   273   static int const kAccessSizeInBits = 128;
   311     layout::PitchLinearShape<4, 8>,
   339       cutlass::arch::OpMultiplyAdd
   391                       LayoutC_, arch::OpClassTensorOp, 2, Operator_
   409     Shape::kM / WarpShape::kM,
   410     Shape::kN / WarpShape::kN,
   411     Shape::kK / WarpShape::kK
   416     !(Shape::kM % WarpShape::kM) &&
   417     !(Shape::kN % WarpShape::kN),
   418     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   425   static int const kThreads = WarpCount::kCount * kWarpSize;
   428   static int const kAccessSizeInBits = 128;
   494       cutlass::arch::OpMultiplyAdd
   546                       ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_
   564     Shape::kM / WarpShape::kM,
   565     Shape::kN / WarpShape::kN,
   566     Shape::kK / WarpShape::kK
   571     !(Shape::kM % WarpShape::kM) &&
   572     !(Shape::kN % WarpShape::kN),
   573     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   580   static int const kThreads = WarpCount::kCount * kWarpSize;
   583   static int const kAccessSizeInBits = 128;
   649       cutlass::arch::OpMultiplyAdd
 cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:247
Template mapping a row-major view of pitch-linear memory to VoltaTensorOpMultiplicandCongruous. 
Definition: tensor_op_multiplicand_sm70.h:630
Describes the size of a matrix tile. 
Definition: matrix_shape.h:42
Definition: aligned_buffer.h:35
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:551
Templates implementing loading of tiles from pitch-linear rank=2 tensors. 
Query the number of threads per warp. 
Definition: gemm/warp/mma.h:43
Templates implementing warp-level matrix multiply-accumulate operations targeting Tensor Cores...
Definition: default_mma_core.h:90
Templates implementing how threads are mapped to a given tile. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:396
Definition: tensor_op_multiplicand_sm70.h:848
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:398
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_sm70.h:560
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:557
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:243
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:402
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:548
Defines a Shape template for matrix tiles. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_sm70.h:671
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:91
Defines the size of an element in bits. 
Definition: numeric_types.h:42
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:400
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_sm70.h:208
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:88
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:83
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:553
Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the glo...
Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. 
Definition: mma_tensor_op_sm70.h:77
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_sm70.h:361
Top-level include for all CUTLASS numeric types. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:86
Shape of a matrix multiply-add operation. 
Definition: include/cutlass/gemm/gemm.h:57
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:84
Policy. 
Definition: mma_tensor_op_policy.h:48
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:549
Definition: tensor_op_multiplicand_sm70.h:943
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:556
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_sm70.h:250
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:394
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:92
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:241
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_sm70.h:405
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_sm70.h:516
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_sm70.h:95
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:555
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:90
Matrix multiply-add operation. 
Definition: arch/mma.h:92
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:401
Template mapping a column-major view of pitch-linear memory to VoltaTensorOpMultiplicandCongruous. 
Definition: tensor_op_multiplicand_sm70.h:191
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:239
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:238
Basic include for CUTLASS. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:393
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:246
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:245