44 Array<half_t, 2> 
operator*(Array<half_t, 2> 
const &a, Array<half_t, 2> 
const &b) {
    54 Array<half_t, 2> 
operator+(AArray<half_t, 2> 
const &a, Array<half_t, 2> 
const &b) {
    64 Array<half_t, 2> 
operator-(Array<half_t, 2> 
const &a, Array<half_t, 2> 
const &b) {
    77 Array<half_t, 2> 
mac(Array<half_t, 2> 
const &a, Array<half_t, 2> 
const &b, Array<half_t, 2> 
const &c) {
   100 float dot(Array<half_t, 2> 
const &a, Array<half_t, 2> 
const &b, 
float accum) {
 Definition: aligned_buffer.h:35
IEEE half-precision floating-point type. 
Definition: half.h:126
CUTLASS_HOST_DEVICE Array< T, N > operator-(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:67
CUTLASS_HOST_DEVICE Array< T, N > operator*(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:45
CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)
Definition: simd.h:101
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE Array< T, N > operator+(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:56
Templates exposing SIMD operators. 
CUTLASS_HOST_DEVICE Array< T, N > mac(Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c)
Definition: simd.h:84