SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
tensor.h
1 #ifndef _CORE_TENSOR_H_
2 #define _CORE_TENSOR_H_
3 
4 #include <cassert>
5 #include <cstdint>
6 #include <cmath>
7 #include <initializer_list>
8 #include <iostream>
9 #include <memory>
10 #include <vector>
11 
12 #include <google/protobuf/repeated_field.h>
13 
14 #include "smaug/core/datatypes.h"
15 #include "smaug/core/tensor.pb.h"
16 #include "smaug/utility/utils.h"
17 
18 namespace smaug {
19 
35 class TensorShape {
36  public:
37  TensorShape() : layout(DataLayout::UnknownLayout) {}
38  TensorShape(std::vector<int> _dims, DataLayout _layout, int _alignment = 0)
39  : dims_(_dims), padding_(dims_.size()), layout(_layout),
40  alignment(_alignment) {
41  computePadding();
42  }
43  TensorShape(std::initializer_list<int> _dims,
44  DataLayout _layout,
45  int _alignment = 0)
46  : dims_(_dims), padding_(dims_.size()), layout(_layout),
47  alignment(_alignment) {
48  computePadding();
49  }
50  TensorShape(const TensorShape& shape)
51  : dims_(shape.dims_), padding_(shape.padding_),
52  layout(shape.layout), alignment(shape.alignment) {}
53  TensorShape(const TensorShapeProto& shapeProto) {
54  std::copy(shapeProto.dims().begin(),
55  shapeProto.dims().end(),
56  std::back_inserter(dims_));
57  padding_.resize(shapeProto.dims_size());
58  layout = shapeProto.layout();
59  alignment = shapeProto.alignment();
60  computePadding();
61  }
62 
63  const std::vector<int>& dims() const { return dims_; }
65  const std::vector<int>& padding() const { return padding_; }
66  int operator[](int index) const { return dims_[getIndex(index)]; }
67  int& operator[](int index) { return dims_[getIndex(index)]; }
69  int getStorageDim(int index) const {
70  return dims_[getIndex(index)] + padding_[getIndex(index)];
71  }
72  bool operator==(const TensorShape& other) const {
73  return (dims_ == other.dims_ && layout == other.layout);
74  }
75  DataLayout getLayout() const { return layout; }
76  int ndims() const { return dims_.size(); }
77  int size() const { return product(dims_); }
78  int storageSize() const { return product(sum(dims_, padding_)); }
79  int getAlignment() const { return alignment; }
80  int getPadding(int index) const { return padding_[index]; }
81 
83  TensorShapeProto* asTensorShapeProto();
84 
85  protected:
86  int getIndex(int index) const {
87  if (index >= 0) return index;
88  return (dims_.size() + index);
89  }
90 
91  void computePadding() {
92  int ndims = dims_.size();
93  padding_[ndims - 1] = calc_padding(dims_[ndims - 1], alignment);
94  for (int i = 0; i < ndims - 1; i++)
95  padding_[i] = 0;
96  }
97  std::vector<int> dims_;
99  std::vector<int> padding_;
100  DataLayout layout;
101  int alignment;
102 };
103 
129  public:
130  TensorIndexIterator(const TensorShape& shape, bool _atEnd = false)
131  : dims(shape.dims()), padding(shape.padding()), atEnd(_atEnd),
132  advanceOne(std::vector<int>(dims.size(), 1)) {
133  state.resize(dims.size(), 0);
134  }
135 
136  operator int() const { return getIndex(state); }
137 
138  bool end() const { return atEnd; }
139 
140  void operator++() { advanceRegion(advanceOne); }
141 
142  void operator+=(const std::vector<int>& region) {
143  assert(region.size() == state.size());
144  advanceRegion(region);
145  }
146 
147  template <typename... Args>
148  int operator()(int i, Args... args) {
149  auto indices = variadicToArray(i, args...);
150  return getIndex(indices);
151  }
152 
153  bool operator==(const TensorIndexIterator& other) const {
154  return (state == other.state && dims == other.dims &&
155  padding == other.padding && atEnd == other.atEnd);
156  }
157 
158  bool operator!=(const TensorIndexIterator& other) const {
159  return !(*this == other);
160  }
161 
162  friend std::ostream& operator<<(std::ostream& os,
163  const TensorIndexIterator& iter);
164 
166  int currentIndex(int dim) const { return state[dim]; }
167 
168  protected:
173  template <typename Container>
174  int getIndex(Container indices) const {
175  int linearIndex = 0, stride = 1;
176  for (int i = (int)indices.size() - 1; i >= 0; i--) {
177  linearIndex += indices[i] * stride;
178  stride *= (dims.at(i) + padding.at(i));
179  }
180  return linearIndex;
181  }
182 
183  /*
184  * Advance the current iterator position by the given region size.
185  *
186  * @param region An N-dim vector indicating how far to increment in each
187  * dimension, if the previous dimension overflowed and caused a carry-over
188  * into the next dimension.
189  */
190  virtual void advanceRegion(const std::vector<int>& region) {
191  bool carry = true;
192  for (int i = (int)state.size() - 1; i >= 0 && carry; i--) {
193  int currValue = state[i] + region[i];
194  carry = (currValue >= dims[i]);
195  if (carry)
196  currValue = 0;
197  state[i] = currValue;
198  }
199  if (carry)
200  atEnd = true;
201  }
202 
204  std::vector<int> state;
206  std::vector<int> dims;
208  std::vector<int> padding;
210  bool atEnd;
212  const std::vector<int> advanceOne;
213 };
214 
232  public:
234  const std::vector<int>& _origin,
235  const std::vector<int>& _regionSize)
236  : TensorIndexIterator(shape, false), origin(_origin),
237  regionSize(_regionSize) {
238  state = origin;
239  }
240 
241  protected:
243  virtual void advanceRegion(const std::vector<int>& advanceRegionSize) {
244  bool carry = true;
245  for (int i = (int)state.size() - 1; i >= 0 && carry; i--) {
246  int currValue = state[i] + advanceRegionSize[i];
247  carry = (currValue >= dims[i] ||
248  currValue >= origin[i] + regionSize[i]);
249  if (carry)
250  currValue = origin[i];
251  state[i] = currValue;
252  }
253  if (carry)
254  atEnd = true;
255  }
256 
257  std::vector<int> origin;
258  std::vector<int> regionSize;
259 };
260 
269 class TensorBase {
270  public:
271  TensorBase() : name(""), dataFormat(UnknownStorageFormat), dead(false) {}
272  virtual ~TensorBase() {}
273 
274  TensorBase(const std::string& _name, const TensorShape& _shape)
275  : name(_name), shape(_shape), dataFormat(Uncompressed),
276  dataType(UnknownDataType), dead(false) {}
277 
278  TensorBase(const TensorProto& tensorProto)
279  : name(tensorProto.name()), shape(tensorProto.shape()),
280  dataFormat(tensorProto.data_format()),
281  dataType(tensorProto.data_type()), dead(false) {}
282 
283  // TODO: Do we need a copy constructor?
284 
285  std::string getName() const { return name; }
286  const TensorShape& getShape() const { return shape; }
287  int ndims() const { return shape.ndims(); }
288  int dim(int index) const { return shape[index]; }
289  int getTotalDim(int index) const { return shape.getStorageDim(index); }
290  int getDataStorageFormat() const { return dataFormat; }
291  DataType getDataType() const { return dataType; }
292  int getDataTypeSize() const {
293  switch (dataType) {
294  case Float16:
295  return sizeof(float16);
296  case Int32:
297  return sizeof(int32_t);
298  case Float32:
299  return sizeof(float);
300  case Int64:
301  return sizeof(int64_t);
302  case Float64:
303  return sizeof(double);
304  case Bool:
305  return sizeof(bool);
306  default:
307  assert(false && "UnknownDataType has no size!");
308  return 0;
309  }
310  }
311  bool isDead() const { return dead; }
312  void setDead(bool _dead = true) { dead = _dead; }
313  virtual bool containsData() const = 0;
314 
315  protected:
317  std::string name;
324  DataStorageFormat dataFormat;
325  DataType dataType;
331  bool dead;
332 };
333 
344 class Tensor : public TensorBase {
345  public:
346  Tensor() : TensorBase(), tensorData(NULL) {}
347 
349  Tensor(const std::string& _name, const TensorShape& _shape)
350  : TensorBase(_name, _shape), tensorData(NULL) {}
351  virtual ~Tensor() {}
352 
359  Tensor(const TensorProto& tensorProto, const TensorData& tensorData)
360  : TensorBase(tensorProto), tensorData(NULL) {
361  DataType dataType = tensorProto.data_type();
362  switch (dataType) {
363  case Float16:
364  fillHalfData(tensorData.half_data());
365  break;
366  case Float32:
367  fillData<float>(tensorData.float_data());
368  break;
369  case Float64:
370  fillData<double>(tensorData.double_data());
371  break;
372  case Int32:
373  fillData<int>(tensorData.int_data());
374  break;
375  case Int64:
376  fillData<int64_t>(tensorData.int64_data());
377  break;
378  case Bool:
379  fillData<bool>(tensorData.bool_data());
380  break;
381  default:
382  assert(false && "Unknown data format!");
383  }
384  }
385 
388  return TensorIndexIterator(shape);
389  }
390 
391  virtual bool containsData() const { return tensorData != nullptr; }
392 
399  template <typename T>
400  void fillData(T* externalData, int size) {
401  T* rawPtr = data<T>();
402 #ifdef USE_PEDANTIC_COPY
403  for (int i = 0; i < size; i++) {
404  rawPtr[i] = externalData[i];
405  }
406 #else
407  std::copy(externalData, externalData + size, rawPtr);
408 #endif
409  }
410 
414  template <typename T>
415  void fillData(std::initializer_list<T> externalData) {
416  T* rawPtr = data<T>();
417 #ifdef USE_PEDANTIC_COPY
418  int i = 0;
419  for (auto dataPtr = externalData.begin(); dataPtr != externalData.end();
420  ++dataPtr, ++i) {
421  rawPtr[i] = *dataPtr;
422  }
423 #else
424  std::copy(externalData.begin(), externalData.end(), rawPtr);
425 #endif
426  }
427 
431  template <typename T>
432  void fillData(const google::protobuf::RepeatedField<T>& externalData) {
433  allocateStorage<T>();
434  T* rawPtr = data<T>();
435 #ifdef USE_PEDANTIC_COPY
436  int i = 0;
437  for (auto dataPtr = externalData.begin(); dataPtr != externalData.end();
438  ++dataPtr, ++i) {
439  rawPtr[i] = *dataPtr;
440  }
441 #else
442  std::copy(externalData.begin(), externalData.end(), rawPtr);
443 #endif
444  }
445 
453  const google::protobuf::RepeatedField<int>& externalData) {
454  allocateStorage<float16>();
455  float16* rawPtr = data<float16>();
456 #ifdef USE_PEDANTIC_COPY
457  for (int i = 0; i < shape.storageSize(); i++) {
458  bool useLowHalf = (i % 2 == 0);
459  rawPtr[i] = externalData[i / 2] >> (useLowHalf ? 0 : 16);
460  }
461 #else
462  const int* externalPtr = externalData.data();
463  memcpy(rawPtr, externalPtr, shape.storageSize() * sizeof(float16));
464 #endif
465  }
466 
472  template <typename T>
474  if (tensorData == NULL) {
475  dataType = ToDataType<T>::dataType;
476  int size = shape.storageSize();
477  assert(size > 0 && "Attempted to allocate zero storage!");
478  tensorData = std::shared_ptr<void>(
479  malloc_aligned(size * sizeof(T), false), free);
480  }
481  return reinterpret_cast<T*>(tensorData.get());
482  }
483 
489  void allocateStorage(DataType _dataType) {
490  switch (_dataType) {
491  case Float16:
492  allocateStorage<float16>();
493  return;
494  case Float32:
495  allocateStorage<float>();
496  return;
497  case Float64:
498  allocateStorage<double>();
499  return;
500  case Int32:
501  allocateStorage<int>();
502  return;
503  case Int64:
504  allocateStorage<int64_t>();
505  return;
506  case Bool:
507  allocateStorage<bool>();
508  return;
509  default:
510  assert(false && "Unknown data type!");
511  }
512  }
513 
515  TensorProto* asTensorProto();
516 
520  template <typename T>
521  const T* data() const {
522  assert(ToDataType<T>::dataType == dataType);
523  return reinterpret_cast<T*>(tensorData.get());
524  }
525 
529  template <typename T>
530  T* data() {
531  assert(ToDataType<T>::dataType == dataType);
532  return reinterpret_cast<T*>(tensorData.get());
533  }
534 
538  friend std::ostream& operator<<(std::ostream& os, const Tensor& tensor);
539 
540  protected:
541  std::shared_ptr<void> tensorData;
542 };
543 
552 class TiledTensor : public TensorBase {
553  public:
554  TiledTensor(Tensor* _origTensor = nullptr, bool _useRawTensor = false)
555  : TensorBase(), origTensor(_origTensor), useRawTensor(_useRawTensor),
556  dataFilled(false) {}
568  Tensor* _origTensor = nullptr,
569  bool _useRawTensor = false)
570  : TensorBase("", shape), origTensor(_origTensor),
571  useRawTensor(_useRawTensor), dataFilled(false) {
572  tiles.resize(shape.size());
573  }
574 
575  virtual bool containsData() const { return !tiles.empty(); }
576 
577  TensorIndexIterator startIndex() const { return TensorIndexIterator(shape); }
578 
580  const Tensor* operator[](int index) const { return tiles.at(index).tensor; }
582  Tensor*& operator[](int index) { return tiles[index].tensor; }
583  int size() const { return shape.size(); }
584 
589  bool isDimNHTiled() const {
590  if (tiles.empty())
591  return false;
592  if (shape.ndims() != 4)
593  return false;
594  // DimNH tiled means that there is more than one block in the row
595  // dimension.
596  return ((shape.getLayout() == DataLayout::NHWC && shape[1] > 1) ||
597  (shape.getLayout() == DataLayout::NCHW && shape[2] > 1));
598  }
599 
604  Tensor* getTileWithData(int index);
605 
610  void setTile(int index,
611  const std::vector<int>& origin,
612  Tensor* tensor,
613  bool copyData);
614 
616  void copyDataToAllTiles();
617 
622  void untile();
623 
624  static void* tileCopyWorker(void* _args);
625 
626  protected:
630  struct Tile {
634  std::vector<int> origin;
636  bool hasOrigin;
638  bool hasData;
639 
645  Tile() : tensor(nullptr), origin(), hasOrigin(false), hasData(false) {}
646  };
647 
655  Gather
656  };
657 
658  struct CopyTilesArgs {
659  TiledTensor* tiledTensor;
660  int start;
661  int numTiles;
663 
664  CopyTilesArgs(TiledTensor* _tiledTensor,
665  int _start,
666  int _numTiles,
667  TileDataOperation _op)
668  : tiledTensor(_tiledTensor), start(_start), numTiles(_numTiles),
669  op(_op) {}
670  };
671 
672  Tile* getTile(int index) { return &tiles[index]; }
673 
675  void copyDataToTile(Tile* tile);
676 
678  void gatherDataFromTile(Tile* tile);
679 
682 
685 
688 
691 
693  std::vector<Tile> tiles;
694 };
695 
696 } // namespace smaug
697 
698 #endif
smaug::TensorShape::padding_
std::vector< int > padding_
Padding along each dimension.
Definition: tensor.h:99
smaug::Tensor
Tensor represents a single multi-dimensional array of data.
Definition: tensor.h:344
smaug::Tensor::asTensorProto
TensorProto * asTensorProto()
Serializes this Tensor to a TensorProto.
Definition: tensor.cpp:16
smaug::TiledTensor::Tile
A tile is a rectangular portion of a larger Tensor.
Definition: tensor.h:630
smaug::Tensor::data
const T * data() const
Returns a const pointer to the Tensor data.
Definition: tensor.h:521
smaug::Tensor::startIndex
TensorIndexIterator startIndex() const
Returns an iterator starting at the beginning of the Tensor.
Definition: tensor.h:387
smaug::Tensor::allocateStorage
void allocateStorage(DataType _dataType)
Allocates memory to store Tensor data.
Definition: tensor.h:489
smaug::TensorBase::dataFormat
DataStorageFormat dataFormat
Indicates the compression format of the data.
Definition: tensor.h:324
smaug::TensorBase::name
std::string name
Name of of the Tensor.
Definition: tensor.h:317
smaug::Tensor::Tensor
Tensor(const TensorProto &tensorProto, const TensorData &tensorData)
Constructs a Tensor from serialized protobufs.
Definition: tensor.h:359
smaug::malloc_aligned
void * malloc_aligned(size_t size, bool zeroOut)
Return heap-allocated cacheline-aligned memory.
Definition: utils.cpp:9
smaug::Tensor::data
T * data()
Returns a non-const pointer to the Tensor data.
Definition: tensor.h:530
smaug::TiledTensor::getTileWithData
Tensor * getTileWithData(int index)
Returns a Tensor at the specified tile position, with data copied from the original tensor.
Definition: tensor.cpp:65
smaug::TensorBase::shape
TensorShape shape
Shape of the Tensor.
Definition: tensor.h:319
smaug::sum
std::vector< T > sum(std::vector< T > array0, std::vector< T > array1)
Returns the elementwise-sum of the two arrays, which must be of the same size.
Definition: utils.h:27
smaug::TiledTensor::origTensor
Tensor * origTensor
The original Tensor that was tiled into this TiledTensor.
Definition: tensor.h:687
smaug::TensorRegionIndexIterator
A tensor index iterator that stays within a specified rectangular region.
Definition: tensor.h:231
smaug::Tensor::fillData
void fillData(std::initializer_list< T > externalData)
Fills the Tensor byte-by-byte from the given initializer list.
Definition: tensor.h:415
smaug::TensorShape::getStorageDim
int getStorageDim(int index) const
Returns the alignment-padded size of the specified dimension.
Definition: tensor.h:69
smaug::Tensor::fillHalfData
void fillHalfData(const google::protobuf::RepeatedField< int > &externalData)
Fill the tensor with float16 data.
Definition: tensor.h:452
smaug::Tensor::allocateStorage
T * allocateStorage()
Allocates memory to store Tensor data.
Definition: tensor.h:473
smaug::TensorRegionIndexIterator::advanceRegion
virtual void advanceRegion(const std::vector< int > &advanceRegionSize)
Advance the tensor region index with the specified region size.
Definition: tensor.h:243
smaug::TiledTensor::dataFilled
bool dataFilled
True if all the tiles have data filled.
Definition: tensor.h:690
smaug::TiledTensor::gatherDataFromTile
void gatherDataFromTile(Tile *tile)
Copy data from this tile to the original Tensor.
Definition: tensor.cpp:171
smaug::TiledTensor
A multidimensional container of Tensors.
Definition: tensor.h:552
smaug::TiledTensor::Tile::hasOrigin
bool hasOrigin
True if the tile has its origin set.
Definition: tensor.h:636
smaug::Tensor::Tensor
Tensor(const std::string &_name, const TensorShape &_shape)
Construct a Tensor with the given name and shape.
Definition: tensor.h:349
smaug::TensorIndexIterator::advanceOne
const std::vector< int > advanceOne
A vector of all ones, used to implement operator++.
Definition: tensor.h:212
smaug::TensorShape::padding
const std::vector< int > & padding() const
Returns a vector of padding along each dimension.
Definition: tensor.h:65
smaug::TiledTensor::setTile
void setTile(int index, const std::vector< int > &origin, Tensor *tensor, bool copyData)
Set the specified tile to the provided Tensor, and optionally copy data into it.
Definition: tensor.cpp:71
smaug::Tensor::operator<<
friend std::ostream & operator<<(std::ostream &os, const Tensor &tensor)
Prints the contents of the Tensor to the given ostream.
Definition: tensor_utils.cpp:38
smaug::TensorIndexIterator::state
std::vector< int > state
The current location of the iterator.
Definition: tensor.h:204
smaug::TensorIndexIterator::getIndex
int getIndex(Container indices) const
Returns the linear index into the Tensor's underlying data container at the specified coordinates.
Definition: tensor.h:174
smaug::TensorIndexIterator::padding
std::vector< int > padding
Alignment padding of the Tensor.
Definition: tensor.h:208
smaug::TiledTensor::Tile::hasData
bool hasData
True if we have copied data to this tile.
Definition: tensor.h:638
smaug::TensorShape::asTensorShapeProto
TensorShapeProto * asTensorShapeProto()
Return a TensorShapeProto that serializes this TensorShape.
Definition: tensor.cpp:8
smaug::TensorShape
TensorShape describes the shape of a Tensor.
Definition: tensor.h:35
smaug::Tensor::fillData
void fillData(T *externalData, int size)
Fills the Tensor with externalData.
Definition: tensor.h:400
smaug::ToDataType
Provides compile-time conversion from C types to SMAUG DataTypes.
Definition: datatypes.h:18
smaug::TiledTensor::parallelCopyTileData
void parallelCopyTileData(TileDataOperation op)
Split the work (data filling or gathering) across multiple threads.
Definition: tensor.cpp:100
smaug::TiledTensor::isDimNHTiled
bool isDimNHTiled() const
Returns true if this TiledTensor is tiled along the N and H logical dimensions.
Definition: tensor.h:589
smaug::TensorIndexIterator::currentIndex
int currentIndex(int dim) const
This returns the current index of the iterator on the specified dim.
Definition: tensor.h:166
smaug::TiledTensor::useRawTensor
bool useRawTensor
True if we should use copyRawTensorData() for copying data.
Definition: tensor.h:684
smaug::Tensor::fillData
void fillData(const google::protobuf::RepeatedField< T > &externalData)
Fills the Tensor byte-by-byte from a protobuf repeated field.
Definition: tensor.h:432
smaug::TensorIndexIterator::atEnd
bool atEnd
If true, we've reached the end of the Tensor.
Definition: tensor.h:210
smaug::TensorBase
The base class of all Tensor objects.
Definition: tensor.h:269
smaug::TiledTensor::operator[]
Tensor *& operator[](int index)
Returns a mutable reference to the Tensor at the given linear index.
Definition: tensor.h:582
smaug::TiledTensor::CopyTilesArgs
Definition: tensor.h:658
smaug::TensorIndexIterator::dims
std::vector< int > dims
The dimensions of this iterator's Tensor.
Definition: tensor.h:206
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
smaug::calc_padding
int calc_padding(int value, unsigned alignment)
Return the difference between value and the next multiple of alignment.
Definition: utils.cpp:35
smaug::TiledTensor::TileDataOperation
TileDataOperation
Specifies what to do with the data in the original Tensor and tiles.
Definition: tensor.h:651
smaug::TiledTensor::Tile::tensor
Tensor * tensor
The new smaller Tensor of this tile.
Definition: tensor.h:632
smaug::TiledTensor::untile
void untile()
Copies data from the TiledTensor into the original Tensor.
Definition: tensor.cpp:153
smaug::TiledTensor::copyDataToAllTiles
void copyDataToAllTiles()
Copies data (if needed) to all the tiles from the original Tensor.
Definition: tensor.cpp:116
smaug::TiledTensor::operator[]
const Tensor * operator[](int index) const
Returns a const pointer to the Tensor at the given linear index.
Definition: tensor.h:580
smaug::TiledTensor::TiledTensor
TiledTensor(const TensorShape &shape, Tensor *_origTensor=nullptr, bool _useRawTensor=false)
Construct a TiledTensor.
Definition: tensor.h:567
smaug::TiledTensor::Scatter
@ Scatter
Copies data from a contiguous Tensor to the tiles.
Definition: tensor.h:653
smaug::variadicToArray
std::array< T, sizeof...(Args)+1 > variadicToArray(T i, Args... elems)
Returns a std::array populated with the given elements.
Definition: utils.h:57
smaug::TensorIndexIterator
An iterator over a multidimensional tensor's indices, accounting for data alignment padding.
Definition: tensor.h:128
smaug::TensorBase::dead
bool dead
If true, the tensor is dead, which means it is on an untaken control flow path.
Definition: tensor.h:331
smaug::TiledTensor::Tile::origin
std::vector< int > origin
The tile's coordinate origins in the original tensor.
Definition: tensor.h:634
smaug::TiledTensor::tiles
std::vector< Tile > tiles
The list of Tiles, indexed using a TensorIndexIterator.
Definition: tensor.h:693
smaug::TiledTensor::copyDataToTile
void copyDataToTile(Tile *tile)
Copy data (if needed) to this tile from the original Tensor.
Definition: tensor.cpp:132
smaug::TiledTensor::Tile::Tile
Tile()
Construct a new blank Tile.
Definition: tensor.h:645
smaug::TiledTensor::Gather
@ Gather
Copies data from the tiles to a contiguous Tensor.
Definition: tensor.h:655