45 template <
int ElementSize, 
int Crosswise>
    82       kTileShapeContiguous * kElementsPerAccess / 
kCrosswise;
    88       ((kTileShapeContiguous / 
kFactor) > (32 / kTileShapeContiguous))
    89           ? (kTileShapeContiguous / kFactor)
   147     int tile_contiguous_idx =
   150     int tile_contiguous_residual =
   156     int partition_contiguous_idx =
   158     int partition_strided_idx =
   161     int partition_contiguous_residual =
   163     int partition_strided_residual =
   170     int permuted_vec_contiguous_within_partition =
   171         partition_contiguous_residual ^ (partition_strided_residual % 4);
   173     int permuted_partition_contiguous_within_tile =
   174         partition_contiguous_idx ^ (partition_strided_idx % 2);
   181                               permuted_partition_contiguous_within_tile *
   183                               permuted_vec_contiguous_within_partition) *
   187     int element_strided = vec_strided_idx;
   189     return element_contiguous + element_strided * stride_[0] * 
kFactor;
   204     return extent[1] * stride_[0];
   212 template <
int ElementSize, 
int Crosswise>
   215   static int const kRank = 2;
   218   static int const kStrideRank = 1;
   239   static int const kAccessSize = Base::kAccessSize;
   247   static int const kElementSize = Base::kElementSize;
   248   static int const kElementsPerAccess = Base::kElementsPerAccess;
   282     return layout_(coord);
   312 template <
int Crosswise>
   315   static int const kRank = 2;
   318   static int const kStrideRank = 1;
   337   static int const kAccessSize = 128;
   355   static int const kElementSize = 32;
   392     int c = (coord.
contiguous() % 32) / kElementsPerAccess;
   395     LongIndex offset = (c ^ (2 * s)) * kElementsPerAccess + s * stride_[0] +
   396                        tc * 32 + ts * stride_[0] * 4 + coord.
contiguous() % 4;
   413     return extent[1] * stride_[0];
   421 template <
int ElementSize, 
int Crosswise>
   425   static int const kRank = 2;
   428   static int const kStrideRank = 1;
   449   static int const kAccessSize = Base::kAccessSize;
   457   static int const kElementSize = Base::kElementSize;
   458   static int const kElementsPerAccess = Base::kElementsPerAccess;
   526 template <
int ElementSize, 
int Crosswise>
   530   static int const kRank = 2;
   533   static int const kStrideRank = 1;
   554   static int const kAccessSize = Base::kAccessSize;
   562   static int const kElementSize = Base::kElementSize;
   563   static int const kElementsPerAccess = Base::kElementsPerAccess;
   631 template <
int ElementSize, 
int Crosswise>
   634   static int const kRank = 2;
   637   static int const kStrideRank = 1;
   658   static int const kAccessSize = Base::kAccessSize;
   666   static int const kElementSize = Base::kElementSize;
   667   static int const kElementsPerAccess = Base::kElementsPerAccess;
   668   static int const kCrosswise = Base::kCrosswise;
   669   static int const kFactor = Base::kFactor;
   703     return layout_(coord);
   733 template <
int ElementSize, 
int Crosswise>
   736   static int const kRank = 2;
   739   static int const kStrideRank = 1;
   760   static int const kAccessSize = Base::kAccessSize;
   768   static int const kElementSize = Base::kElementSize;
   769   static int const kElementsPerAccess = Base::kElementsPerAccess;
   834 template <
int ElementSize, 
int Crosswise>
   837   static int const kRank = 2;
   840   static int const kStrideRank = 1;
   861   static int const kAccessSize = Base::kAccessSize;
   869   static int const kElementSize = Base::kElementSize;
   870   static int const kElementsPerAccess = Base::kElementsPerAccess;
   934 template <
int ElementSize, 
int InterleavedK>
   938   static int const kRank = 2;
   941   static int const kStrideRank = 1;
   960   static int const kAccessSize = 128;
   966   static int const kElementSize = ElementSize;
   970   static int const kInterleavedK = InterleavedK;
  1004     int const rows_per_smem_cache_line = 128 / kInterleavedK;
  1006     int row_id = coord.
strided() / rows_per_smem_cache_line;
  1007     int col_id = (coord.
strided() % rows_per_smem_cache_line) * kInterleavedK + coord.
contiguous();
  1009     int access_block_id = col_id >> 4;
  1010     int swizzle_access_block_id = access_block_id ^ (row_id & 1);
  1012     int swizzle_col_id = swizzle_access_block_id << 4;
  1014     return row_id * 128 + swizzle_col_id;
  1032     return (extent[1] / kInterleavedK) * stride_[0];
  1039 template <
int ElementSize, 
int InterleavedK>
  1043   static int const kRank = 2;
  1046   static int const kStrideRank = 1;
  1065   static int const kAccessSize = 128;
  1071   static int const kElementSize = ElementSize;
  1075   static int const kInterleavedK = InterleavedK;
  1109     int const rows_per_smem_cache_line = 128 / kInterleavedK;
  1111     int row_id = coord.
strided() / rows_per_smem_cache_line;
  1112     int col_id = (coord.
strided() % rows_per_smem_cache_line) * kInterleavedK + coord.
contiguous();
  1114     int access_block_id = col_id >> 4;
  1115     int swizzle_access_block_id = access_block_id ^ (row_id & 1);
  1117     int swizzle_col_id = swizzle_access_block_id << 4;
  1119     return row_id * 128 + swizzle_col_id;
  1137     return (extent[0] / kInterleavedK) * stride_[0];
 int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:434
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:460
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:240
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:791
CUTLASS_HOST_DEVICE Index const & column() const 
Returns the column of the coordinate. 
Definition: matrix_coord.h:85
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Definition: tensor_op_multiplicand_sm75.h:412
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:944
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:670
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:640
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCrosswise(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:888
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:137
Coordinate in pitch-linear space. 
Definition: pitch_linear.h:52
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:762
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:863
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:719
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:431
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:321
CUTLASS_HOST_DEVICE TensorOpMultiplicandCrosswise(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:691
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:536
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const 
Inverse of layout function, mapping linear offset to logical coordinate. 
Definition: tensor_op_multiplicand_sm75.h:910
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:803
Definition: tensor_op_multiplicand_sm75.h:734
static int const kRank
Logical rank of tensor. 
Definition: tensor_op_multiplicand_sm75.h:48
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:539
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const 
Inverse of layout function, mapping linear offset to logical coordinate. 
Definition: tensor_op_multiplicand_sm75.h:287
CUTLASS_HOST_DEVICE TensorOpMultiplicandColumnMajorInterleaved(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:992
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:54
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:459
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:221
A Coord is a coordinate of arbitrary rank into a tensor or matrix. 
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:1052
static CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:590
Definition: tensor_op_multiplicand_sm75.h:422
static CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCrosswise packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:896
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:659
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:843
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:555
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:742
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const 
Inverse of layout function, mapping linear offset to logical coordinate. 
Definition: tensor_op_multiplicand_sm75.h:603
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:451
Definition: tensor_op_multiplicand_sm75.h:835
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:57
static CUTLASS_HOST_DEVICE TensorOpMultiplicandCrosswise packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:695
Definition: tensor_op_multiplicand_sm75.h:213
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:249
CUTLASS_HOST_DEVICE Index const & row() const 
Returns the row of the coordinate. 
Definition: matrix_coord.h:77
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Definition: tensor_op_multiplicand_sm75.h:303
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCrosswise(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:892
static int const kTileShapeContiguous
Definition: tensor_op_multiplicand_sm75.h:78
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:820
static CUTLASS_HOST_DEVICE TensorOpMultiplicandRowMajorInterleaved packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:1101
static CUTLASS_HOST_DEVICE TensorOpMultiplicandColumnMajorInterleaved packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:996
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:250
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:917
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Definition: tensor_op_multiplicand_sm75.h:825
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:1025
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:787
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:947
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:224
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:564
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:407
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:281
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Definition: tensor_op_multiplicand_sm75.h:724
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:761
CUTLASS_HOST_DEVICE TensorOpMultiplicandColumnMajorInterleaved(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:988
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:450
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:816
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:846
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:517
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:294
static int const kElementsPerAccess
Definition: tensor_op_multiplicand_sm75.h:73
static int const kStrided
Definition: pitch_linear.h:45
int32_t Index
Index type used for coordinates. 
Definition: tensor_op_multiplicand_sm75.h:1049
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:745
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:871
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCongruous(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:586
Definition: tensor_op_multiplicand_sm75.h:46
static int const kContiguous
Definition: pitch_linear.h:44
Template based on element size (in bits) - defined in terms of pitch-linear memory. 
Definition: tensor_op_multiplicand_sm75.h:935
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:597
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:771
CUTLASS_HOST_DEVICE TensorOpMultiplicand(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:126
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCongruous(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:477
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:1031
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:266
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:388
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:904
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:1124
static CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:274
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:324
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCongruous(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:481
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:505
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:671
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Definition: tensor_op_multiplicand_sm75.h:926
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:921
CUTLASS_HOST_DEVICE Index const & contiguous() const 
Returns the contiguous dimension. 
Definition: pitch_linear.h:89
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:1003
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCongruous(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:582
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:511
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:610
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:556
static CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:485
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor_op_multiplicand_sm75.h:643
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:715
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:194
CUTLASS_HOST_DEVICE TensorOpMultiplicandCrosswise(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:687
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const 
Inverse of layout function, mapping linear offset to logical coordinate. 
Definition: tensor_op_multiplicand_sm75.h:708
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:270
Template based on element size (in bits) - defined in terms of pitch-linear memory. 
Definition: tensor_op_multiplicand_sm75.h:1040
CUTLASS_HOST_DEVICE TensorOpMultiplicandRowMajorInterleaved(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:1093
static CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:795
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:565
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:198
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:862
static CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:381
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:298
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:616
static int const kCrosswise
Definition: tensor_op_multiplicand_sm75.h:74
static int const kTileShapeStride
Definition: tensor_op_multiplicand_sm75.h:87
Defines a canonical coordinate for rank=2 matrices offering named indices. 
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:1019
CUTLASS_HOST_DEVICE TensorOpMultiplicand(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:122
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const 
Inverse of layout function, mapping linear offset to logical coordinate. 
Definition: tensor_op_multiplicand_sm75.h:809
static int const kElementSize
Definition: tensor_op_multiplicand_sm75.h:72
static int const kFactor
Number of kblocks to store PartitionShape::kContiguous Elements. 
Definition: tensor_op_multiplicand_sm75.h:81
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Definition: tensor_op_multiplicand_sm75.h:203
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:660
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:241
Defines layout functions used by TensorRef and derived classes for pitch-linear memory. 
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:403
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const 
Inverse of layout function, mapping linear offset to logical coordinate. 
Definition: tensor_op_multiplicand_sm75.h:498
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:872
Definition: tensor_op_multiplicand_sm75.h:632
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:1136
CUTLASS_HOST_DEVICE TensorOpMultiplicandRowMajorInterleaved(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:1097
static int const kStrideRank
Rank of stride vector. 
Definition: tensor_op_multiplicand_sm75.h:51
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:702
static CUTLASS_HOST_DEVICE TensorOpMultiplicand packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor_op_multiplicand_sm75.h:130
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:492
Basic include for CUTLASS. 
Definition: matrix_coord.h:39
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Index ldm=0)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:373
CUTLASS_HOST_DEVICE Index const & strided() const 
Returns the column of the coordinate. 
Definition: pitch_linear.h:97
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:622
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Definition: tensor_op_multiplicand_sm75.h:1108
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor_op_multiplicand_sm75.h:1130
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:770
static int const kAccessSize
This layout is optimized for 128b accesses. 
Definition: tensor_op_multiplicand_sm75.h:70
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Stride stride)
Ctor. 
Definition: tensor_op_multiplicand_sm75.h:377
Definition: tensor_op_multiplicand_sm75.h:527