34 #if !defined(__CUDACC_RTC__)    40 #include <type_traits>    46 #include <cublas_v2.h>    47 #include <curand_kernel.h>    68 template <
typename FloatType>
    71   return curand_normal(state);
    77   return curand_normal_double(state);
    80 template <
typename FloatType>
    83   return curand_uniform(state);
    89   return curand_uniform_double(state);
    92 template <
typename Element>
    95   using FloatType = 
typename std::conditional<(sizeof(Element) > 4), 
double, 
float>::type;
    96   using IntType = 
typename std::conditional<(sizeof(Element) > 4), int64_t, 
int>::type;
   124       int_scale(int_scale_) {
   147     uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;
   149     curand_init(params.
seed, gtid, 0, &rng_state);
   165       result = Element(rnd);
   208       view(view_), random(random_) {
   234     params.
view.
at(coord) = random();
   249   Element 
mean = Element(0),              
   250   Element 
stddev = Element(1),            
   257   using Params = 
typename Func::Params;
   268 template <
typename Element>               
   273   Element 
mean = Element(0),              
   274   Element 
stddev = Element(1),            
   292 template <
typename Element>                
   295   using FloatType = 
typename std::conditional<
   296     (
sizeof(Element) > 4),
   300   using IntType = 
typename std::conditional<
   301     (
sizeof(Element) > 4),
   335       int_scale(int_scale_) {
   358     uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;
   360     curand_init(params.
seed, gtid, 0, &rng_state);
   368     rnd = params.
min + params.
range * rnd;
   379       result = Element(rnd);
   426       view(view_), random(random_) {
   451     params.
view.
at(coord) = random();
   466   Element 
max = Element(1),               
   467   Element 
min = Element(0),               
   474   using Params = 
typename Func::Params;
   476   typename RandomFunc::Params random(seed, 
max, 
min, bits);
   487 template <
typename Element>
   492   Element 
max = Element(1),               
   493   Element 
min = Element(0),               
   499   typename RandomFunc::Params 
params(seed, 
max, 
min, bits);
   546       Element diag_ = Element(1),
   547       Element other_ = Element(0)
   549       view(view_), diag(diag_), other(other_) {
   578     for (
int i = 1; i < Layout::kRank; ++i) {
   579       if (coord[i] != coord[i - 1]) {
   599   Element diag = Element(1),              
   600   Element other = Element(0)) {           
   603   typedef typename Func::Params 
Params;
   619   Element val = Element(0)) {               
   677       Element diag_ = Element(1)
   679       view(view_), diag(diag_) {
   708     for (
int i = 1; i < Layout::kRank; ++i) {
   709       if (coord[i] != coord[i - 1]) {
   731   Element diag = Element(1)) {
   734   typedef typename Func::Params 
Params;
   783       Element other_ = Element(0)
   785       view(view_), other(other_) {
   814     for (
int i = 1; i < Layout::kRank; ++i) {
   815       if (coord[i] != coord[i - 1]) {
   837   Element other = Element(1)) {
   840   typedef typename Func::Params 
Params;
   876     Array<Element, Layout::kRank> 
v;
   890       Array<Element, Layout::kRank> 
const & v_,
   891       Element s_ = Element(0)
   893       view(view_), v(v_), s(s_) { 
   918     Element sum = params.
s;
   921     for (
int i = 0; i < Layout::kRank; ++i) {
   922       sum += params.
v[i] * Element(coord[i]);
   925     params.
view.
at(coord) = sum;
   939   Array<Element, Layout::kRank> 
const & v,
   940   Element s = Element(0)) {
   943   using Params = 
typename Func::Params;
   961   Element v = Element(1),
   962   Element s = Element(0)) {
   980     BlockFillRandomGaussian<Element>(
   984       static_cast<Element
>(dist.
gaussian.mean), 
   985       static_cast<Element>(dist.
gaussian.stddev), 
   989     BlockFillRandomUniform<Element>(
   993       static_cast<Element
>(dist.
uniform.max),
   994       static_cast<Element>(dist.
uniform.min), 
  1042       view(view_), ptr(ptr_) { 
  1067     bool is_diagonal = 
true;
  1070     for (
int i = 1; i < Layout::kRank; ++i) {
  1071       if (coord[i] != coord[0]) {
  1072         is_diagonal = 
false;
  1076       params.
view.
at(coord) = params.
ptr[coord[0]];
  1091   Element 
const *ptr) {                     
  1094   using Params = 
typename Func::Params;
  1146       view(view_), ptr(ptr_) { 
  1171     bool is_diagonal = 
true;
  1174     for (
int i = 1; i < Layout::kRank; ++i) {
  1175       if (coord[i] != coord[0]) {
  1176         is_diagonal = 
false;
  1180       params.
ptr[coord[0]] = params.
view.
at(coord);  
  1198   using Params = 
typename Func::Params;
 TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:866
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:751
CUTLASS_HOST_DEVICE Params()
Default ctor. 
Definition: device/tensor_fill.h:881
Params(TensorView view_=TensorView(), Element diag_=Element(1))
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:675
CUTLASS_DEVICE RandomGaussianFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:145
CUTLASS_HOST_DEVICE Params()
Default ctor. 
Definition: device/tensor_fill.h:1031
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:760
Definition: aligned_buffer.h:35
Definition: distribution.h:40
void TensorCopyDiagonalOut(Element *ptr, TensorView< Element, Layout > view)
Copies the diagonal of a tensor into a dense buffer in host memory. 
Definition: device/tensor_fill.h:1193
Definition: distribution.h:40
TensorView view
Definition: device/tensor_fill.h:663
struct cutlass::Distribution::@18::@20 uniform
Uniform distribution. 
Parameters structure. 
Definition: device/tensor_fill.h:1020
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:863
Element const * ptr
Definition: device/tensor_fill.h:1027
TensorView view
Definition: device/tensor_fill.h:196
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:1017
Parameters structure. 
Definition: device/tensor_fill.h:99
Kind kind
Active variant kind. 
Definition: distribution.h:64
void TensorFillIdentity(TensorView< Element, Layout > view)
Fills a tensor's digonal with 1 and 0 everywhere else. 
Definition: device/tensor_fill.h:630
CUTLASS_HOST_DEVICE TensorCoord const & extent() const 
Returns the extent of the view (the size along each logical dimension). 
Definition: tensor_view.h:167
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:645
Params(TensorView view_, Element *ptr_)
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:1142
Params params
Parameters object. 
Definition: device/tensor_fill.h:1052
struct cutlass::Distribution::@18::@21 gaussian
Gaussian distribution. 
Definition: device/tensor_fill.h:93
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:651
CUTLASS_DEVICE TensorUpdateDiagonalFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:697
CUTLASS_DEVICE TensorFillLinearFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:911
void TensorCopyDiagonalIn(TensorView< Element, Layout > view, Element const *ptr)
Copies a diagonal in from host memory without modifying off-diagonal elements. 
Definition: device/tensor_fill.h:1089
curandState_t rng_state
RNG state object. 
Definition: device/tensor_fill.h:137
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:917
Defines a structure containing strides and a pointer to tensor data. 
RandomFunc random
Definition: device/tensor_fill.h:218
uint64_t seed
Definition: device/tensor_fill.h:105
CUTLASS_DEVICE double random_normal_float< double >(curandState_t *state)
Definition: device/tensor_fill.h:76
Defines a floating-point type based on the number of exponent and mantissa bits. 
Definition: numeric_types.h:144
typename std::conditional<(sizeof(Element) > 4), double, float >::type FloatType
Definition: device/tensor_fill.h:95
TensorView view
Definition: device/tensor_fill.h:875
Element Element
Data type of individual access. 
Definition: tensor_view.h:72
Params(TensorView view_, Element const *ptr_)
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:1038
Params(TensorView view_=TensorView(), Element diag_=Element(1), Element other_=Element(0))
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:544
void BlockFillSequential(Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))
Fills a block of data with sequential elements. 
Definition: device/tensor_fill.h:958
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:857
int int_scale
Definition: device/tensor_fill.h:108
void TensorFillRandomGaussian(TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
Fills a tensor with random values with a Gaussian distribution. 
Definition: device/tensor_fill.h:246
CUTLASS_DEVICE TensorFillDiagonalFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:567
Params params
Definition: device/tensor_fill.h:217
void BlockFillRandomUniform(Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
Fills a tensor with random values with a uniform random distribution. 
Definition: device/tensor_fill.h:488
Parameters structure. 
Definition: device/tensor_fill.h:190
TensorView view
Definition: device/tensor_fill.h:1026
Parameters structure. 
Definition: device/tensor_fill.h:869
Params(TensorView view_, Array< Element, Layout::kRank > const &v_, Element s_=Element(0))
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:888
void TensorFillDiagonal(TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0))
Fills a tensor everywhere with a unique value for its diagonal. 
Definition: device/tensor_fill.h:597
typename Layout::TensorCoord TensorCoord
Coordinate in logical tensor space. 
Definition: tensor_view.h:87
Element s
Definition: device/tensor_fill.h:877
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:1118
Params params
Parameters object. 
Definition: device/tensor_fill.h:795
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:513
Parameters structure. 
Definition: device/tensor_fill.h:525
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:1008
FloatType mean
Definition: device/tensor_fill.h:106
Element diag
Definition: device/tensor_fill.h:664
CUTLASS_DEVICE TensorCopyDiagonalInFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:1060
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:654
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:573
void TensorFill(TensorView< Element, Layout > view, Element val=Element(0))
Fills a tensor with a uniform value. 
Definition: device/tensor_fill.h:617
This header contains a class to parametrize a statistical distribution function. 
Params params
Parameters object. 
Definition: device/tensor_fill.h:134
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:703
void BlockFillRandomGaussian(Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
Fills a tensor with random values with a Gaussian distribution. 
Definition: device/tensor_fill.h:269
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:1121
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:1014
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:185
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Element other
Definition: device/tensor_fill.h:770
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:232
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Only update the diagonal element. 
Definition: device/tensor_fill.h:1066
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:1112
TensorView view
Definition: device/tensor_fill.h:1130
Element other
Definition: device/tensor_fill.h:533
Parameters structure. 
Definition: device/tensor_fill.h:763
Launches a kernel calling a functor for each element in a tensor's index space. 
Definition: device/tensor_foreach.h:39
Parameters structure. 
Definition: device/tensor_fill.h:657
Array< Element, Layout::kRank > v
Definition: device/tensor_fill.h:876
void TensorUpdateDiagonal(TensorView< Element, Layout > view, Element diag=Element(1))
Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements...
Definition: device/tensor_fill.h:729
CUTLASS_DEVICE double random_uniform_float< double >(curandState_t *state)
Definition: device/tensor_fill.h:88
TensorView view
Definition: device/tensor_fill.h:769
CUTLASS_DEVICE FloatType random_normal_float(curandState_t *state)
Definition: device/tensor_fill.h:70
CUTLASS_DEVICE TensorUpdateOffDiagonalFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:803
void BlockFillRandom(Element *ptr, size_t capacity, uint64_t seed, Distribution dist)
Fills a block of data with sequential elements. 
Definition: device/tensor_fill.h:973
Params(uint64_t seed_=0, Element mean_=0, Element stddev_=1, int int_scale_=-1)
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:115
CUTLASS_DEVICE TensorCopyDiagonalOutFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:1164
void TensorFillLinear(TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0))
Fills tensor with a linear combination of its coordinate and another vector. 
Definition: device/tensor_fill.h:937
Element * ptr
Definition: device/tensor_fill.h:1131
Params params
Parameters object. 
Definition: device/tensor_fill.h:559
Params params
Parameters object. 
Definition: device/tensor_fill.h:689
Parameters structure. 
Definition: device/tensor_fill.h:1124
void TensorUpdateOffDiagonal(TensorView< Element, Layout > view, Element other=Element(1))
Writes a uniform value to all elements in the tensor without modifying diagonal elements. 
Definition: device/tensor_fill.h:835
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:757
Element diag
Definition: device/tensor_fill.h:532
typename std::conditional<(sizeof(Element) > 4), int64_t, int >::type IntType
Definition: device/tensor_fill.h:96
CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const 
Returns a reference to the element at a given Coord. 
Definition: tensor_ref.h:307
CUTLASS_HOST_DEVICE Params()
Default ctor. 
Definition: device/tensor_fill.h:1135
TensorView view
Definition: device/tensor_fill.h:531
CUTLASS_DEVICE FloatType random_uniform_float(curandState_t *state)
Definition: device/tensor_fill.h:82
CUTLASS_HOST_DEVICE Params()
Default ctor. 
Definition: device/tensor_fill.h:668
void TensorFillRandomUniform(TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
Fills a tensor with random values with a uniform random distribution. 
Definition: device/tensor_fill.h:463
Params params
Parameters object. 
Definition: device/tensor_fill.h:1156
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space. 
Definition: device/tensor_fill.h:522
CUTLASS_DEVICE Element operator()()
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:154
Definition: device/tensor_foreach.h:92
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:1170
Distribution type. 
Definition: distribution.h:38
Computes a random Gaussian distribution. 
Definition: device/tensor_fill.h:176
CUTLASS_HOST_DEVICE Params()
Default ctor. 
Definition: device/tensor_fill.h:774
CUTLASS_HOST_DEVICE Params()
Default ctor. 
Definition: device/tensor_fill.h:537
int int_scale
Random values are cast to integer after scaling by this power of two. 
Definition: distribution.h:67
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:519
TensorView::Element T
Scalar type. 
Definition: device/tensor_fill.h:182
Basic include for CUTLASS. 
Params(TensorView view_=TensorView(), typename RandomFunc::Params random_=typename RandomFunc::Params())
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:204
CUTLASS_DEVICE TensorFillRandomGaussianFunc(Params const ¶ms)
Device-side initialization of RNG. 
Definition: device/tensor_fill.h:226
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state. 
Definition: device/tensor_fill.h:809
Params params
Parameters object. 
Definition: device/tensor_fill.h:903
Params(TensorView view_=TensorView(), Element other_=Element(0))
Construction of Gaussian RNG functor. 
Definition: device/tensor_fill.h:781
RandomFunc::Params random
Definition: device/tensor_fill.h:197
FloatType stddev
Definition: device/tensor_fill.h:107