SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smv_pooling_tiling.cpp
1 #include <algorithm>
2 
3 #include "smaug/core/backend.h"
5 #include "smaug/operators/smv/smv_pooling_op.h"
6 #include "smaug/operators/smv/smv_pooling_tiling.h"
7 #include "smaug/utility/debug_stream.h"
8 
9 namespace smaug {
10 namespace smv {
11 namespace pool {
12 
13 std::array<TilingDims, 2> TilingOptimizer::determineBestTilingDims(
14  Tensor* inputs,
15  Tensor* outputs,
16  int maxTileSize,
17  std::pair<int, int> poolSize) {
18  // Determine the best tiling strategy for each of inputs and outputs. Don't
19  // try to figure out the actual tile sizes yet.
20  TilingDims bestInputTilingDims = findBestTilingDims(
21  inputs->getShape(),
22  maxTileSize,
23  { 1, poolSize.first, poolSize.second, kVectorSize });
24  TilingDims bestOutputTilingDims = findBestTilingDims(
25  outputs->getShape(), maxTileSize, { 1, 1, 1, kVectorSize });
26 
27  // Apply some constraints to simplify tiling logic.
28  //
29  // If inputs require rowwise/columnwise tiling, then outputs also require
30  // rowwise/columnwise tiling. Strictly speaking this is not necessarily
31  // required but it will greatly simplify memory management.
32  if (needsHwiseTiling(bestInputTilingDims)) {
33  if (needsCwiseTiling(bestOutputTilingDims))
34  bestOutputTilingDims = DimNCH;
35  else if (needsWwiseTiling(bestOutputTilingDims))
36  bestOutputTilingDims = DimNHW;
37  else
38  bestOutputTilingDims = DimNH;
39  }
40  if (needsWwiseTiling(bestInputTilingDims)) {
41  if (needsCwiseTiling(bestOutputTilingDims))
42  bestOutputTilingDims = DimNCW;
43  else if (needsHwiseTiling(bestOutputTilingDims))
44  bestOutputTilingDims = DimNHW;
45  else
46  bestOutputTilingDims = DimNW;
47  }
48 
49  return { bestInputTilingDims, bestOutputTilingDims };
50 }
51 
53  Tensor* inputs = op->getInput(op->Inputs);
54  Tensor* outputs = op->getOutput(op->Outputs);
55  int maxTileSize = SmvBackend::SpadSize() / inputs->getDataTypeSize();
56  std::pair<int, int> poolSize = op->getPoolingSize();
57  std::pair<int, int> poolStride = op->getPoolingStride();
58  std::array<TilingDims, 2> strategies =
59  determineBestTilingDims(inputs, outputs, maxTileSize, poolSize);
60  TilingDims inputTilingDims = strategies[0];
61  TilingDims outputTilingDims = strategies[1];
62 
63  dout(2) << " Tiling dimensions chosen: \n"
64  << " input: " << inputTilingDims
65  << ", output: " << outputTilingDims << "\n";
66 
67  TensorShape inputsShape = inputs->getShape();
68  TensorShape outputsShape = outputs->getShape();
69 
70  // There are four degrees of freedom we can play with in total:
71  // N (batch), H (rows), C (channels), and P (ofmap).
72  // Each tiling strategy may reduce this down to just three.
73  // 1. Start with inputs. Enumerate all shapes that fit.
74  // 2. Move on to outputs. Enumerate all shapes that are compatible with
75  // the input shape and fit.
76  // For all tiling strategy, compute the total SRAM utilization. The highest
77  // one is the chosen one.
78  std::vector<TensorShape> inputConfigs;
79  if (inputTilingDims == DimN) {
80  std::vector<int> minShape = inputsShape.dims();
81  minShape[0] = 1;
82  enum4DTensorTilingConfigs(inputsShape,
83  maxTileSize,
84  minShape,
85  { 1, 1, 1, 1 },
86  inputConfigs);
87  } else if (inputTilingDims == DimNC) {
88  std::vector<int> minShape = inputsShape.dims();
89  minShape[0] = 1;
90  minShape[3] = kVectorSize;
91  enum4DTensorTilingConfigs(inputsShape,
92  maxTileSize,
93  minShape,
94  { 1, 1, 1, kVectorSize },
95  inputConfigs);
96  } else if (inputTilingDims == DimNH) {
97  std::vector<int> minShape = inputsShape.dims();
98  minShape[0] = 1;
99  minShape[1] = poolSize.first;
100  enum4DTensorTilingConfigs(inputsShape,
101  maxTileSize,
102  minShape,
103  { 1, poolStride.first, 1, 1 },
104  inputConfigs);
105  } else if (inputTilingDims == DimNW) {
106  std::vector<int> minShape = inputsShape.dims();
107  minShape[0] = 1;
108  minShape[2] = poolSize.second;
109  enum4DTensorTilingConfigs(inputsShape,
110  maxTileSize,
111  minShape,
112  { 1, 1, poolStride.second, 1 },
113  inputConfigs);
114  } else if (inputTilingDims == DimNHW) {
115  std::vector<int> minShape = { 1, poolSize.first, poolSize.second,
116  inputsShape[3] };
117  std::vector<int> strides = { 1, poolStride.first, poolStride.second,
118  1 };
120  inputsShape, maxTileSize, minShape, strides, inputConfigs);
121  } else if (inputTilingDims == DimNCH) {
122  std::vector<int> minShape = { 1, poolSize.first, inputsShape[2],
123  kVectorSize };
124  std::vector<int> strides = { 1, poolStride.first, 1, kVectorSize };
126  inputsShape, maxTileSize, minShape, strides, inputConfigs);
127  } else if (inputTilingDims == DimNCW) {
128  std::vector<int> minShape = { 1, inputsShape[1], poolSize.second,
129  kVectorSize };
130  std::vector<int> strides = { 1, 1, poolStride.second, kVectorSize };
132  inputsShape, maxTileSize, minShape, strides, inputConfigs);
133  } else {
134  inputConfigs.push_back(inputsShape);
135  }
136  assert(!inputConfigs.empty() && "No tiling configurations found!");
137 
138  // Fill in outputs.
139  std::vector<TilingConfig> fullConfigs;
140  for (auto it = inputConfigs.begin(); it != inputConfigs.end(); ++it) {
141  TilingConfig config(*it);
142  config.outputs = outputsShape;
143  config.outputs[0] = config.inputs[0];
144  if (needsHwiseTiling(outputTilingDims)) {
145  config.outputs[1] = op->calcOutputRows(config.inputs[1]);
146  }
147  if (needsWwiseTiling(outputTilingDims)) {
148  config.outputs[2] = op->calcOutputCols(config.inputs[2]);
149  }
150  // If inputs and outputs both need channelwise tiling, make the tiles
151  // have the same number of channels.
152  if (needsCwiseTiling(inputTilingDims) &&
153  needsCwiseTiling(outputTilingDims)) {
154  config.outputs[3] = config.inputs[3];
155  }
156  if (config.outputs.storageSize() <= maxTileSize) {
157  fullConfigs.push_back(config);
158  }
159  }
160  dout(2) << " Number of possible tiling configs: " << fullConfigs.size()
161  << "\n";
162  for (auto& config : fullConfigs)
163  dout(2) << " " << config << "\n";
164  auto maxIt = std::max_element(
165  fullConfigs.begin(),
166  fullConfigs.end(),
167  [](const TilingConfig& c1, const TilingConfig& c2) {
168  return c1.getTotalSize() < c2.getTotalSize();
169  });
170  assert(maxIt != fullConfigs.end() && "Failed to get best tiling config!");
171  // Fill in the tiling dims.
172  maxIt->inputTilingDims = inputTilingDims;
173  maxIt->outputTilingDims = outputTilingDims;
174  return *maxIt;
175 }
176 
177 std::array<TiledTensor, 2> TilingOptimizer::doTiling(SmvPoolingOp* op) {
178  auto input = op->getInput(SmvPoolingOp::Inputs);
179  auto output = op->getOutput(SmvPoolingOp::Outputs);
181  int poolRowSize, poolColSize, poolRowStride, poolColStride;
182  std::tie(poolRowSize, poolColSize) = op->getPoolingSize();
183  std::tie(poolRowStride, poolColStride) = op->getPoolingStride();
184  TiledTensor tiledInputs =
186  tileConfig.inputs,
187  op,
188  poolRowSize,
189  poolColSize,
190  poolRowStride,
191  poolColStride,
192  ValidPadding,
193  /* copy_data */ false);
194  TiledTensor tiledOutputs = generateTiledTensor(
195  output, tileConfig.outputs, op);
196  return { tiledInputs, tiledOutputs };
197 }
198 
199 } // namespace pool
200 } // namespace smv
201 } // namespace smaug
smaug::Tensor
Tensor represents a single multi-dimensional array of data.
Definition: tensor.h:344
smaug::smv::TilingOptimizerBase::enum4DTensorTilingConfigs
static void enum4DTensorTilingConfigs(TensorShape shape, int maxTileSize, const std::vector< int > &minShape, const std::vector< int > &strides, std::vector< TensorShape > &configs)
Enumerates all tiling configs for a four dimensional Tensor.
Definition: smv_tiling_base.cpp:78
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
smaug::smv::TilingDims
TilingDims
The set of supported tiling strategies.
Definition: smv_tiling_common.h:13
smaug::SmvPoolingOp
Base class for SMV pooling oeprators.
Definition: smv_pooling_op.h:23
smaug::smv::pool::TilingOptimizer::computeBasicTileShapes
static TilingConfig computeBasicTileShapes(SmvPoolingOp *op)
Determine the best basic tiling shape for this pooling layer.
Definition: smv_pooling_tiling.cpp:52
smaug::smv::pool::TilingOptimizer::determineBestTilingDims
static std::array< TilingDims, 2 > determineBestTilingDims(Tensor *inputs, Tensor *outputs, int maxTileSize, std::pair< int, int > poolSize)
Determine the best tiling dimensions for running pooling on SMV.
Definition: smv_pooling_tiling.cpp:13
smaug::TiledTensor
A multidimensional container of Tensors.
Definition: tensor.h:552
smaug::TensorShape
TensorShape describes the shape of a Tensor.
Definition: tensor.h:35
smaug::generateTiledTensorWithStrideAndPadding
TiledTensor generateTiledTensorWithStrideAndPadding(Tensor *tensor, const TensorShape &tileShape, Operator *op, int fieldRows, int fieldCols, int rowStride, int colStride, PaddingType paddingType, bool copyData)
Generates a TiledTensor from a source Tensor with the specified tile shape.
Definition: tensor_utils.cpp:233
smaug::smv::TilingOptimizerBase::findBestTilingDims
static TilingDims findBestTilingDims(const TensorShape &shape, int maxTileSize, const std::vector< int > &minShape)
Find the best set of dimensions to tile a given tensor shape.
Definition: smv_tiling_base.cpp:10
smaug::smv::TilingConfig
A TilingConfig describes tiling strategies and optimal tile sizes for inputs, weights,...
Definition: smv_tiling_common.h:29
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::generateTiledTensor
TiledTensor generateTiledTensor(Tensor *tensor, const TensorShape &tileShape, Operator *op, bool copyData)
Generates a TiledTensor from a source Tensor.
Definition: tensor_utils.cpp:335