|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the global memory fragments, data types, and internal tile sizes. More...
#include "cutlass/cutlass.h"#include "cutlass/array.h"#include "cutlass/fast_math.h"#include "cutlass/numeric_types.h"#include "cutlass/matrix_shape.h"#include "cutlass/transform/pitch_linear_thread_map.h"#include "cutlass/transform/threadblock/regular_tile_iterator_pitch_linear.h"#include "cutlass/transform/threadblock/regular_tile_iterator_pitch_linear_2dthreadtile.h"#include "cutlass/gemm/warp/mma_simt_policy.h"#include "cutlass/gemm/warp/mma_simt.h"#include "cutlass/gemm/threadblock/default_mma_core.h"

Go to the source code of this file.
| Namespaces | |
| cutlass | |
| cutlass::gemm | |
| cutlass::gemm::threadblock | |
| cutlass::gemm::threadblock::detail | |
| Functions | |
| template<typename WarpShape > | |
| constexpr int | cutlass::gemm::threadblock::detail::simt_get_warp_threads_m () | 
| constexpr int | cutlass::gemm::threadblock::detail::simt_transpose_padding (int threads, int crosswise, int size_in_bits) | 
| Computes padding in shared memory to perform efficient transpose without bank conflicts.  More... | |
Partial specializations for threadblock::Mma operations targeting simt instructions.
 1.8.11
 1.8.11