42 template <
typename Func, 
int Rank, 
int RankRemaining>
    52     for (
int i = Rank - RankRemaining; i < Rank; ++i) {
    56     coord[Rank - 1 - RankRemaining] = index / product;
    57     int64_t remaining = index % product;
    64 template <
typename Func, 
int Rank>
    71     coord[Rank - 1] = index;
    84 template <
typename Func, 
int Rank, 
typename Params>
    89   int64_t index = threadIdx.x + blockIdx.x * blockDim.x;
    90   int64_t max_index = 1;
    93   for (
int i = 0; i < Rank; ++i) {
    98   while  (index < max_index) {
   102     index += blockDim.x * gridDim.x;
   109 template <
typename Func, 
int Rank, 
typename Params>
   114   int64_t index = threadIdx.x + blockIdx.x * blockDim.x + start;
   120     for (
int i = 0; i < Rank; ++i) {
   130 template <
typename Element, 
typename Func>
   134   typename Func::Params params) {
   138   size_t index = threadIdx.x + blockIdx.x * blockDim.x;
   140   for (; index < capacity; index += blockDim.x * gridDim.x) {
 Definition: aligned_buffer.h:35
A Coord is a coordinate of arbitrary rank into a tensor or matrix. 
__inline__ __device__ TensorForEachHelper(Func &func, Coord< Rank > const &size, Coord< Rank > &coord, int64_t index)
Constructor for fastest chaning rank. 
Definition: device/kernel/tensor_foreach.h:69
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
__global__ void BlockForEach(Element *ptr, size_t capacity, typename Func::Params params)
Definition: device/kernel/tensor_foreach.h:131
#define CUTLASS_PRAGMA_NO_UNROLL
Definition: cutlass.h:111
Statically-sized array specifying Coords within a tensor. 
Definition: coord.h:43
__inline__ __device__ TensorForEachHelper(Func &func, Coord< Rank > const &size, Coord< Rank > &coord, int64_t index)
Constructor for general rank. 
Definition: device/kernel/tensor_foreach.h:47
__global__ void TensorDiagonalForEach(Coord< Rank > size, Params params, int start, int end)
Kernel calls a functor for each element along a tensor's diagonal. 
Definition: device/kernel/tensor_foreach.h:110
__global__ void TensorForEach(Coord< Rank > size, Params params=Params())
Kernel calls a functor for each element in a tensor's index space. 
Definition: device/kernel/tensor_foreach.h:85
Helper to perform for-each operation. 
Definition: device/kernel/tensor_foreach.h:43
Basic include for CUTLASS.