54 namespace threadblock {
    60   typename ThreadShape_,      
   127   static_assert((ThreadShape::kM == 1), 
"M=1 is required for GEMV");
   129   static_assert(Shape::kK % ThreadShape::kK == 0, 
"Shape::K must be a multiple of ThreadShape::K");
   132                 (ThreadShape::kK == 2) || 
   133                 (ThreadShape::kK == 4) ||
   134                 (ThreadShape::kK == 8) ||
   135                 (ThreadShape::kK == 16) ||
   136                 (ThreadShape::kK == 32)
   138               "ThreadShape::K must be a 1, 2, 4, 8, 16 or 32");
 Describes the size of a matrix tile. 
Definition: matrix_shape.h:42
Definition: aligned_buffer.h:35
Shape_ Shape
Definition: default_gemv_core.h:70
Templates implementing how threads are mapped to a given tile. 
ThreadShape_ ThreadShape
Definition: default_gemv_core.h:71
Defines common types used for all GEMM-like operators. 
ElementA_ ElementA
Definition: default_gemv_core.h:77
typename platform::conditional< platform::is_same< LayoutC, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kM >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kN >, kThreadsPerN, ThreadShape::kM >>::type IteratorPolicyC
Definition: default_gemv_core.h:108
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
Defines a Shape template for matrix tiles. 
typename platform::conditional< platform::is_same< LayoutA, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kK, Shape::kM >, 1, ThreadShape::kK >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kK >, 1, ThreadShape::kM >>::type IteratorPolicyA
Definition: default_gemv_core.h:88
typename platform::conditional< platform::is_same< LayoutB, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kK >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kK, Shape::kN >, kThreadsPerN, ThreadShape::kK >>::type IteratorPolicyB
Definition: default_gemv_core.h:98
ElementC_ ElementC
Definition: default_gemv_core.h:79
MmaSimtOp Operator
Definition: default_gemv_core.h:122
typename cutlass::gemm::thread::Mma< cutlass::gemm::GemmShape< ThreadShape::kM, ThreadShape::kN, Shape::kK >, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC > MmaSimtOp
Definition: default_gemv_core.h:120
Templates exposing architecture support for warp-level multiply-add operations. 
Top-level include for all CUTLASS numeric types. 
Shape of a matrix multiply-add operation. 
Definition: include/cutlass/gemm/gemm.h:57
Definition: default_gemv_core.h:68
LayoutB_ LayoutB
Definition: default_gemv_core.h:74
static int const kThreadsPerN
Definition: default_gemv_core.h:81
Structure to compute the matrix product. 
Definition: gemm/thread/mma.h:66
Defines layout functions used by TensorRef and derived classes. 
Template for a threadblock-scoped GEMV kernel. 
ElementB_ ElementB
Definition: default_gemv_core.h:78
LayoutC_ LayoutC
Definition: default_gemv_core.h:75
Basic include for CUTLASS. 
LayoutA_ LayoutA
Definition: default_gemv_core.h:73