47 template <
typename Fragment>
    48 CUTLASS_DEVICE 
void dump_fragment(Fragment 
const& frag, 
int N = 0, 
int M = 0,
    50   int total_threads = blockDim.x * blockDim.y * blockDim.z;
    52       blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
    53   int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +
    54                   (threadIdx.y * blockDim.x) + threadIdx.x;
    56   if (N < 0 || N > total_threads) {
    57     if (thread_id == 0 && block_id == 0)
    58       printf(
"Thread number N = %d should between [1, %d].\n", N,
    66   int total_elements = frag.size();
    68   if (M < 0 || M > total_elements) {
    69     if (thread_id == 0 && block_id == 0)
    70       printf(
"Element number M = %d should between [1, %d].\n", M,
    78   if (N == 0) N = total_threads;
    80   if (M == 0) M = total_elements;
    83     if (thread_id == 0 && block_id == 0)
    84       printf(
"Stride S = %d should between [1, %d].\n", S, M);
    91   if (thread_id == 0 && block_id == 0)
    92     printf(
"\n*******************Dumping the fragments*******************\n\n");
    95   for (
int tid = 0; tid < N; ++tid) {
    96     if (tid == thread_id) {
    97       printf(
"TB%d W%d T%d: ", block_id, tid / 32, tid & 31);
    99       for (
int i = 0; i < M; i += S) {
   100         printf(
"%.0f ", 
float(
typename Fragment::value_type(frag[i])));
   108   if (thread_id == 0 && block_id == 0)
   109     printf(
"\n***********************************************************\n\n");
   120 #define SHMEM_ROW_SIZE 128   124 template <
typename Element>
   125 CUTLASS_DEVICE 
void dump_shmem(Element 
const* ptr, 
size_t size, 
int S = 1) {
   127       blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
   128   int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +
   129                   (threadIdx.y * blockDim.x) + threadIdx.x;
   131   if (ptr == 
nullptr) {
   132     if (thread_id == 0 && block_id == 0) printf(
"ptr is null.\n");
   139     if (thread_id == 0 && block_id == 0)
   140       printf(
"Element size is less than 1\n");
   149   if (S < 1 || S > row_elements) {
   150     if (thread_id == 0 && block_id == 0)
   151       printf(
"Stride S = %d should between [1, %d].\n", S, row_elements);
   161     printf(
"\n********Dumping the shared memory of TB %d*******\n\n", block_id);
   163   if (thread_id == 0) {
   164     for (
int i = 0; i < size; i += row_elements) {
   165       for (
int j = 0; j < row_elements; j += S) {
   166         printf(
"%.0f ", 
float(ptr[i + j]));
   174     printf(
"\n***********************************************************\n\n");
 Definition: aligned_buffer.h:35
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
CUTLASS_DEVICE void dump_fragment(Fragment const &frag, int N=0, int M=0, int S=1)
Definition: device_dump.h:48
#define CUTLASS_PRAGMA_NO_UNROLL
Definition: cutlass.h:111
#define SHMEM_ROW_SIZE
Definition: device_dump.h:120
Basic include for CUTLASS. 
CUTLASS_DEVICE void dump_shmem(Element const *ptr, size_t size, int S=1)
Definition: device_dump.h:125