4 #include "smaug/core/tensor.h"
6 #include "smaug/core/workspace.h"
7 #include "smaug/utility/debug_stream.h"
12 void printTensorElement<float16>(std::ostream& os,
15 os << fp16_ieee_to_fp32_value(data[index]);
18 std::ostream& operator<<(std::ostream& os,
const TensorShape& shape) {
20 for (
int i = 0; i < shape.ndims(); i++) {
22 if (i != shape.ndims() - 1)
29 std::ostream& operator<<(std::ostream& os,
const TensorIndexIterator& iter) {
31 for (
int i = 0; i < iter.dims.size(); ++i) {
32 os << iter.state[i] <<
" ";
38 std::ostream& operator<<(std::ostream& os,
const Tensor& tensor) {
39 DataType type = tensor.getDataType();
42 writeTensorToOstream<uint16_t>(os, tensor);
45 writeTensorToOstream<float>(os, tensor);
48 writeTensorToOstream<double>(os, tensor);
51 writeTensorToOstream<int>(os, tensor);
54 writeTensorToOstream<int64_t>(os, tensor);
57 writeTensorToOstream<bool>(os, tensor);
60 assert(
false &&
"Unknown data type!");
67 std::vector<int> destOrigin,
68 std::vector<int> srcOrigin,
69 std::vector<int> regionSize) {
70 assert(dest->ndims() == src->ndims());
71 assert(dest->getDataType() == src->getDataType());
72 switch (dest->getDataType()) {
74 internal::copyTensorRegion<uint16_t>(
75 dest, src, destOrigin, srcOrigin, regionSize);
78 internal::copyTensorRegion<float>(
79 dest, src, destOrigin, srcOrigin, regionSize);
82 internal::copyTensorRegion<double>(
83 dest, src, destOrigin, srcOrigin, regionSize);
86 internal::copyTensorRegion<int>(
87 dest, src, destOrigin, srcOrigin, regionSize);
90 internal::copyTensorRegion<int64_t>(
91 dest, src, destOrigin, srcOrigin, regionSize);
94 internal::copyTensorRegion<bool>(
95 dest, src, destOrigin, srcOrigin, regionSize);
98 assert(
false &&
"Unknown data type!");
104 std::vector<int> destOrigin,
105 std::vector<int> srcOrigin,
107 assert(dest->getDataType() == src->getDataType());
108 switch (dest->getDataType()) {
110 internal::copyTensorData<uint16_t>(
111 dest, src, destOrigin, srcOrigin, copySize);
114 internal::copyTensorData<float>(
115 dest, src, destOrigin, srcOrigin, copySize);
118 internal::copyTensorData<double>(
119 dest, src, destOrigin, srcOrigin, copySize);
122 internal::copyTensorData<int>(
123 dest, src, destOrigin, srcOrigin, copySize);
126 internal::copyTensorData<int64_t>(
127 dest, src, destOrigin, srcOrigin, copySize);
130 internal::copyTensorData<bool>(
131 dest, src, destOrigin, srcOrigin, copySize);
134 assert(
false &&
"Unknown data type!");
143 assert(dest->getDataType() == src->getDataType());
144 switch (dest->getDataType()) {
146 internal::copyRawTensorData<uint16_t>(
147 dest, src, destOffset, srcOffset, copySize);
150 internal::copyRawTensorData<float>(
151 dest, src, destOffset, srcOffset, copySize);
154 internal::copyRawTensorData<double>(
155 dest, src, destOffset, srcOffset, copySize);
158 internal::copyRawTensorData<int>(
159 dest, src, destOffset, srcOffset, copySize);
162 internal::copyRawTensorData<int64_t>(
163 dest, src, destOffset, srcOffset, copySize);
166 internal::copyRawTensorData<bool>(
167 dest, src, destOffset, srcOffset, copySize);
170 assert(
false &&
"Unknown data type!");
186 int computePaddedTileDim(
int maxTileDim,
191 int numStrides = (maxTileDim + padding - weightDim) / stride;
194 int tileDim = weightDim + stride * numStrides;
195 return tileDim - padding;
203 const TensorShape& inputShape = tensor->getShape();
204 int inputSize = inputShape.storageSize();
205 int tileSize = tileShape.storageSize();
206 int numTiles = std::ceil(inputSize * 1.0 / tileSize);
208 TensorShape({ 1, numTiles }, DataLayout::NC), tensor,
true);
209 int remainingSize = inputSize;
211 for (
auto tileIndex = tiledTensor.startIndex(); !tileIndex.end();
213 int currentTileSize = std::min(remainingSize, tileSize);
216 tileShape.getAlignment());
217 std::string tileName = op->getName() +
":" + tensor->getName() +
218 "/tile:" + std::to_string((
int)tileIndex);
221 tiledTensor.
setTile(tileIndex, { srcOffset }, tile, copyData);
222 srcOffset += currentTileSize;
223 remainingSize -= currentTileSize;
225 op->getWorkspace()->addTiledTensor(tiledTensor);
226 dout(1) <<
" Tiled Tensor " << tensor->getName() <<
":\n"
227 <<
" original tensor shape: " << tensor->getShape() <<
"\n"
228 <<
" tile shape " << tileShape
229 <<
", number of tiles: " << tiledTensor.size() <<
"\n";
241 PaddingType paddingType,
243 const TensorShape& inputShape = tensor->getShape();
244 const int ndims = inputShape.ndims();
245 DataLayout layout = inputShape.getLayout();
248 std::vector<int> tilingHalos(ndims, 0);
249 int hIdx = layout == NHWC ? 1 : NCHW ? 2 : -1;
250 int wIdx = layout == NHWC ? 2 : NCHW ? 3 : -1;
256 if (hIdx != -1 && fieldRows != 0)
257 tilingHalos[hIdx] = fieldRows - rowStride;
258 if (wIdx != -1 && fieldCols != 0)
259 tilingHalos[wIdx] = fieldCols - colStride;
261 int totalRowPad = (paddingType == SamePadding) ? fieldRows - 1 : 0;
262 int totalColPad = (paddingType == SamePadding) ? fieldCols - 1 : 0;
266 std::vector<std::vector<int>> tilesInDim(ndims);
268 for (
int i = 0; i < ndims; i++) {
269 int remaining = inputShape[i];
270 while (remaining > 0) {
271 int tileDim = std::min(tileShape[i], remaining);
272 bool firstTileInDim = tilesInDim[i].size() == 0;
273 bool lastTileInDim = remaining <= tileShape[i];
276 if (i == hIdx && firstTileInDim && !lastTileInDim) {
277 tileDim = internal::computePaddedTileDim(
278 tileDim, topPad, fieldRows, rowStride);
279 }
else if (i == wIdx && firstTileInDim && !lastTileInDim) {
280 tileDim = internal::computePaddedTileDim(
281 tileDim, leftPad, fieldCols, colStride);
283 tilesInDim[i].push_back(tileDim);
284 remaining -= tileDim;
286 remaining += tilingHalos[i];
289 std::vector<int> numBlocksInDim(ndims, 0);
290 for (
int i = 0; i < ndims; i++)
291 numBlocksInDim[i] = tilesInDim[i].size();
293 TensorShape(numBlocksInDim, inputShape.getLayout()), tensor);
294 if (tiledTensor.size() == 1) {
297 tiledTensor[0] = tensor;
299 std::vector<int> currentOrigin(ndims, 0);
300 for (
auto tileIndex = tiledTensor.startIndex(); !tileIndex.end();
302 std::vector<int> currentTileShape(ndims);
303 for (
int i = 0; i < ndims; i++)
304 currentTileShape[i] = tilesInDim[i][tileIndex.currentIndex(i)];
306 tileShape.getLayout(),
307 tileShape.getAlignment());
308 std::string tileName = op->getName() +
":" + tensor->getName() +
309 "/tile:" + std::to_string((
int)tileIndex);
312 tiledTensor.
setTile(tileIndex, currentOrigin, tile,
false);
313 for (
int i = ndims - 1; i >= 0; i--) {
314 currentOrigin[i] += currentShape[i];
315 if (currentOrigin[i] >= inputShape[i]) {
316 currentOrigin[i] = 0;
318 currentOrigin[i] -= tilingHalos[i];
327 op->getWorkspace()->addTiledTensor(tiledTensor);
328 dout(1) <<
" Tiled Tensor " << tensor->getName() <<
":\n"
329 <<
" original tensor shape: " << tensor->getShape() <<
"\n"
330 <<
" tile shape: " << tileShape
331 <<
", number of tiles: " << tiledTensor.size() <<
"\n";
340 tensor, tileShape, op, 0, 0, 1, 1, ValidPadding, copyData);
344 const TensorShape& tensorShape = destTensor->getShape();
345 int ndims = tensorShape.ndims();
347 for (
auto tileIndex = tiledTensor.startIndex(); !tileIndex.end();
349 Tensor* tile = tiledTensor[tileIndex];
352 destTensor, tile, destOffset, 0, tileShape.storageSize());
353 destOffset += tileShape.storageSize();
360 std::string outputName = inputTensors[0]->getName();
361 TensorShape inputShape = inputTensors[0]->getShape();
362 std::vector<int> outputDims = inputShape.dims();
364 for (
int i = 1; i < inputTensors.size(); i++) {
365 outputName += (
"-" + inputTensors[i]->getName());
366 outputDims[concatDim] += inputTensors[i]->getShape()[concatDim];
369 outputDims, inputShape.getLayout(), inputShape.getAlignment());
370 Tensor* outputTensor =
new Tensor(outputName, outputShape);
371 workspace->addTensor(outputTensor);
374 int ndims = inputShape.ndims();
375 std::vector<int> currentOrigin(ndims, 0);
376 std::vector<int> srcOrigin(ndims, 0);
377 for (
int i = 0; i < inputTensors.size(); i++) {
378 TensorShape srcShape = inputTensors[i]->getShape();
384 currentOrigin[concatDim] += srcShape[concatDim];