SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
|
1 #ifndef _CORE_TENSOR_H_
2 #define _CORE_TENSOR_H_
7 #include <initializer_list>
12 #include <google/protobuf/repeated_field.h>
14 #include "smaug/core/datatypes.h"
15 #include "smaug/core/tensor.pb.h"
16 #include "smaug/utility/utils.h"
37 TensorShape() : layout(DataLayout::UnknownLayout) {}
38 TensorShape(std::vector<int> _dims, DataLayout _layout,
int _alignment = 0)
39 : dims_(_dims),
padding_(dims_.size()), layout(_layout),
40 alignment(_alignment) {
46 : dims_(_dims),
padding_(dims_.size()), layout(_layout),
47 alignment(_alignment) {
52 layout(shape.layout), alignment(shape.alignment) {}
54 std::copy(shapeProto.dims().begin(),
55 shapeProto.dims().end(),
56 std::back_inserter(dims_));
57 padding_.resize(shapeProto.dims_size());
58 layout = shapeProto.layout();
59 alignment = shapeProto.alignment();
63 const std::vector<int>& dims()
const {
return dims_; }
66 int operator[](
int index)
const {
return dims_[getIndex(index)]; }
67 int& operator[](
int index) {
return dims_[getIndex(index)]; }
70 return dims_[getIndex(index)] +
padding_[getIndex(index)];
73 return (dims_ == other.dims_ && layout == other.layout);
75 DataLayout getLayout()
const {
return layout; }
76 int ndims()
const {
return dims_.size(); }
77 int size()
const {
return product(dims_); }
78 int storageSize()
const {
return product(
sum(dims_,
padding_)); }
79 int getAlignment()
const {
return alignment; }
80 int getPadding(
int index)
const {
return padding_[index]; }
86 int getIndex(
int index)
const {
87 if (index >= 0)
return index;
88 return (dims_.size() + index);
91 void computePadding() {
92 int ndims = dims_.size();
94 for (
int i = 0; i < ndims - 1; i++)
97 std::vector<int> dims_;
138 bool end()
const {
return atEnd; }
140 void operator++() { advanceRegion(
advanceOne); }
142 void operator+=(
const std::vector<int>& region) {
143 assert(region.size() ==
state.size());
144 advanceRegion(region);
147 template <
typename... Args>
148 int operator()(
int i, Args... args) {
159 return !(*
this == other);
162 friend std::ostream& operator<<(std::ostream& os,
173 template <
typename Container>
175 int linearIndex = 0, stride = 1;
176 for (
int i = (
int)indices.size() - 1; i >= 0; i--) {
177 linearIndex += indices[i] * stride;
190 virtual void advanceRegion(
const std::vector<int>& region) {
192 for (
int i = (
int)
state.size() - 1; i >= 0 && carry; i--) {
193 int currValue =
state[i] + region[i];
194 carry = (currValue >=
dims[i]);
197 state[i] = currValue;
234 const std::vector<int>& _origin,
235 const std::vector<int>& _regionSize)
237 regionSize(_regionSize) {
245 for (
int i = (
int)
state.size() - 1; i >= 0 && carry; i--) {
246 int currValue =
state[i] + advanceRegionSize[i];
247 carry = (currValue >=
dims[i] ||
248 currValue >= origin[i] + regionSize[i]);
250 currValue = origin[i];
251 state[i] = currValue;
257 std::vector<int> origin;
258 std::vector<int> regionSize;
276 dataType(UnknownDataType),
dead(
false) {}
279 :
name(tensorProto.name()),
shape(tensorProto.shape()),
281 dataType(tensorProto.data_type()),
dead(
false) {}
285 std::string getName()
const {
return name; }
287 int ndims()
const {
return shape.ndims(); }
288 int dim(
int index)
const {
return shape[index]; }
290 int getDataStorageFormat()
const {
return dataFormat; }
291 DataType getDataType()
const {
return dataType; }
292 int getDataTypeSize()
const {
295 return sizeof(float16);
297 return sizeof(int32_t);
299 return sizeof(float);
301 return sizeof(int64_t);
303 return sizeof(double);
307 assert(
false &&
"UnknownDataType has no size!");
311 bool isDead()
const {
return dead; }
312 void setDead(
bool _dead =
true) {
dead = _dead; }
313 virtual bool containsData()
const = 0;
350 :
TensorBase(_name, _shape), tensorData(NULL) {}
359 Tensor(
const TensorProto& tensorProto,
const TensorData& tensorData)
361 DataType dataType = tensorProto.data_type();
367 fillData<float>(tensorData.float_data());
370 fillData<double>(tensorData.double_data());
373 fillData<int>(tensorData.int_data());
376 fillData<int64_t>(tensorData.int64_data());
379 fillData<bool>(tensorData.bool_data());
382 assert(
false &&
"Unknown data format!");
391 virtual bool containsData()
const {
return tensorData !=
nullptr; }
399 template <
typename T>
401 T* rawPtr = data<T>();
402 #ifdef USE_PEDANTIC_COPY
403 for (
int i = 0; i < size; i++) {
404 rawPtr[i] = externalData[i];
407 std::copy(externalData, externalData + size, rawPtr);
414 template <
typename T>
415 void fillData(std::initializer_list<T> externalData) {
416 T* rawPtr = data<T>();
417 #ifdef USE_PEDANTIC_COPY
419 for (
auto dataPtr = externalData.begin(); dataPtr != externalData.end();
421 rawPtr[i] = *dataPtr;
424 std::copy(externalData.begin(), externalData.end(), rawPtr);
431 template <
typename T>
432 void fillData(
const google::protobuf::RepeatedField<T>& externalData) {
433 allocateStorage<T>();
434 T* rawPtr = data<T>();
435 #ifdef USE_PEDANTIC_COPY
437 for (
auto dataPtr = externalData.begin(); dataPtr != externalData.end();
439 rawPtr[i] = *dataPtr;
442 std::copy(externalData.begin(), externalData.end(), rawPtr);
453 const google::protobuf::RepeatedField<int>& externalData) {
454 allocateStorage<float16>();
455 float16* rawPtr = data<float16>();
456 #ifdef USE_PEDANTIC_COPY
457 for (
int i = 0; i <
shape.storageSize(); i++) {
458 bool useLowHalf = (i % 2 == 0);
459 rawPtr[i] = externalData[i / 2] >> (useLowHalf ? 0 : 16);
462 const int* externalPtr = externalData.data();
463 memcpy(rawPtr, externalPtr,
shape.storageSize() *
sizeof(float16));
472 template <
typename T>
474 if (tensorData == NULL) {
476 int size =
shape.storageSize();
477 assert(size > 0 &&
"Attempted to allocate zero storage!");
478 tensorData = std::shared_ptr<void>(
481 return reinterpret_cast<T*
>(tensorData.get());
492 allocateStorage<float16>();
495 allocateStorage<float>();
498 allocateStorage<double>();
501 allocateStorage<int>();
504 allocateStorage<int64_t>();
507 allocateStorage<bool>();
510 assert(
false &&
"Unknown data type!");
520 template <
typename T>
523 return reinterpret_cast<T*
>(tensorData.get());
529 template <
typename T>
532 return reinterpret_cast<T*
>(tensorData.get());
541 std::shared_ptr<void> tensorData;
568 Tensor* _origTensor =
nullptr,
569 bool _useRawTensor =
false)
575 virtual bool containsData()
const {
return !
tiles.empty(); }
577 TensorIndexIterator startIndex()
const {
return TensorIndexIterator(
shape); }
583 int size()
const {
return shape.size(); }
592 if (
shape.ndims() != 4)
596 return ((
shape.getLayout() == DataLayout::NHWC &&
shape[1] > 1) ||
597 (
shape.getLayout() == DataLayout::NCHW &&
shape[2] > 1));
611 const std::vector<int>& origin,
624 static void* tileCopyWorker(
void* _args);
668 : tiledTensor(_tiledTensor), start(_start), numTiles(_numTiles),
672 Tile* getTile(
int index) {
return &
tiles[index]; }
std::vector< int > padding_
Padding along each dimension.
Tensor represents a single multi-dimensional array of data.
TensorProto * asTensorProto()
Serializes this Tensor to a TensorProto.
A tile is a rectangular portion of a larger Tensor.
const T * data() const
Returns a const pointer to the Tensor data.
TensorIndexIterator startIndex() const
Returns an iterator starting at the beginning of the Tensor.
void allocateStorage(DataType _dataType)
Allocates memory to store Tensor data.
DataStorageFormat dataFormat
Indicates the compression format of the data.
std::string name
Name of of the Tensor.
Tensor(const TensorProto &tensorProto, const TensorData &tensorData)
Constructs a Tensor from serialized protobufs.
void * malloc_aligned(size_t size, bool zeroOut)
Return heap-allocated cacheline-aligned memory.
T * data()
Returns a non-const pointer to the Tensor data.
Tensor * getTileWithData(int index)
Returns a Tensor at the specified tile position, with data copied from the original tensor.
TensorShape shape
Shape of the Tensor.
std::vector< T > sum(std::vector< T > array0, std::vector< T > array1)
Returns the elementwise-sum of the two arrays, which must be of the same size.
Tensor * origTensor
The original Tensor that was tiled into this TiledTensor.
A tensor index iterator that stays within a specified rectangular region.
void fillData(std::initializer_list< T > externalData)
Fills the Tensor byte-by-byte from the given initializer list.
int getStorageDim(int index) const
Returns the alignment-padded size of the specified dimension.
void fillHalfData(const google::protobuf::RepeatedField< int > &externalData)
Fill the tensor with float16 data.
T * allocateStorage()
Allocates memory to store Tensor data.
virtual void advanceRegion(const std::vector< int > &advanceRegionSize)
Advance the tensor region index with the specified region size.
bool dataFilled
True if all the tiles have data filled.
void gatherDataFromTile(Tile *tile)
Copy data from this tile to the original Tensor.
A multidimensional container of Tensors.
bool hasOrigin
True if the tile has its origin set.
Tensor(const std::string &_name, const TensorShape &_shape)
Construct a Tensor with the given name and shape.
const std::vector< int > advanceOne
A vector of all ones, used to implement operator++.
const std::vector< int > & padding() const
Returns a vector of padding along each dimension.
void setTile(int index, const std::vector< int > &origin, Tensor *tensor, bool copyData)
Set the specified tile to the provided Tensor, and optionally copy data into it.
friend std::ostream & operator<<(std::ostream &os, const Tensor &tensor)
Prints the contents of the Tensor to the given ostream.
std::vector< int > state
The current location of the iterator.
int getIndex(Container indices) const
Returns the linear index into the Tensor's underlying data container at the specified coordinates.
std::vector< int > padding
Alignment padding of the Tensor.
bool hasData
True if we have copied data to this tile.
TensorShapeProto * asTensorShapeProto()
Return a TensorShapeProto that serializes this TensorShape.
TensorShape describes the shape of a Tensor.
void fillData(T *externalData, int size)
Fills the Tensor with externalData.
Provides compile-time conversion from C types to SMAUG DataTypes.
void parallelCopyTileData(TileDataOperation op)
Split the work (data filling or gathering) across multiple threads.
bool isDimNHTiled() const
Returns true if this TiledTensor is tiled along the N and H logical dimensions.
int currentIndex(int dim) const
This returns the current index of the iterator on the specified dim.
bool useRawTensor
True if we should use copyRawTensorData() for copying data.
void fillData(const google::protobuf::RepeatedField< T > &externalData)
Fills the Tensor byte-by-byte from a protobuf repeated field.
bool atEnd
If true, we've reached the end of the Tensor.
The base class of all Tensor objects.
Tensor *& operator[](int index)
Returns a mutable reference to the Tensor at the given linear index.
std::vector< int > dims
The dimensions of this iterator's Tensor.
The smaug namespace is the parent namespace of all C++ code in SMAUG.
int calc_padding(int value, unsigned alignment)
Return the difference between value and the next multiple of alignment.
TileDataOperation
Specifies what to do with the data in the original Tensor and tiles.
Tensor * tensor
The new smaller Tensor of this tile.
void untile()
Copies data from the TiledTensor into the original Tensor.
void copyDataToAllTiles()
Copies data (if needed) to all the tiles from the original Tensor.
const Tensor * operator[](int index) const
Returns a const pointer to the Tensor at the given linear index.
TiledTensor(const TensorShape &shape, Tensor *_origTensor=nullptr, bool _useRawTensor=false)
Construct a TiledTensor.
@ Scatter
Copies data from a contiguous Tensor to the tiles.
std::array< T, sizeof...(Args)+1 > variadicToArray(T i, Args... elems)
Returns a std::array populated with the given elements.
An iterator over a multidimensional tensor's indices, accounting for data alignment padding.
bool dead
If true, the tensor is dead, which means it is on an untaken control flow path.
std::vector< int > origin
The tile's coordinate origins in the original tensor.
std::vector< Tile > tiles
The list of Tiles, indexed using a TensorIndexIterator.
void copyDataToTile(Tile *tile)
Copy data (if needed) to this tile from the original Tensor.
Tile()
Construct a new blank Tile.
@ Gather
Copies data from the tiles to a contiguous Tensor.