SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smv_batch_norm_tiling.cpp
1 #include <algorithm>
2 
3 #include "smaug/core/backend.h"
5 #include "smaug/operators/smv/smv_batch_norm_op.h"
6 #include "smaug/operators/smv/smv_batch_norm_tiling.h"
7 #include "smaug/utility/debug_stream.h"
8 
9 namespace smaug {
10 namespace smv {
11 namespace bn {
12 
13 std::array<TilingDims, 2> TilingOptimizer::determineBestTilingDims(
14  Tensor* inputs, Tensor* weights, int maxTileSize) {
15  // Determine the best tiling dims for each of inputs and weights. The
16  // outputs have the same shape as the inputs and should use the same tiling
17  // dims.
18  const TensorShape& inputShape = inputs->getShape();
19  TilingDims bestInputTilingDims;
20  if (inputShape.ndims() == 4) {
21  bestInputTilingDims = findBestTilingDims(
22  inputShape,
23  maxTileSize,
24  { 1, kVectorSize, kVectorSize, kVectorSize });
25  } else {
26  bestInputTilingDims =
27  findBestTilingDims(inputShape, maxTileSize, { 1, kVectorSize });
28  }
29  TilingDims bestWeightTilingDims = findBestTilingDims(
30  weights->getShape(), maxTileSize, { 4, kVectorSize });
31 
32  return { bestInputTilingDims, bestWeightTilingDims };
33 }
34 
35 void TilingOptimizer::enumPostFCTilingConfigs(
36  TensorShape inputsShape,
37  TensorShape weightsShape,
38  int maxTileSize,
39  std::array<TilingDims, 2> strategies,
40  std::list<TilingConfig>& fullConfigs) {
41  TilingDims inputTilingDims = strategies[0];
42  TilingDims weightTilingDims = strategies[1];
43  // Supported tiling dims: None, DimN and DimNC for inputs. None and DimNC
44  // for weights.
45  // The tiling config enumeration goes as follows:
46  // 1. Start with inputs. Enumerate all shapes that fit.
47  // 2. Move on to weights and outputs. Enumerate all shapes that are
48  // compatible with the input shape and fit. The outputs tile use the same
49  // tile shape as the inputs.
50  // 3. For all tiling strategy, compute the total SRAM utilization. The
51  // highest one is the chosen one.
52  assert(inputTilingDims == None || inputTilingDims == DimN ||
53  inputTilingDims == DimNC);
54  assert(weightTilingDims == None || weightTilingDims == DimNC);
55  std::vector<TensorShape> inputsConfigs;
56  if (inputTilingDims == DimN) {
57  std::vector<int> minShape = inputsShape.dims();
58  minShape[0] = 1;
60  inputsShape, maxTileSize, minShape, { 1, 1 }, inputsConfigs);
61  } else if (inputTilingDims == DimNC) {
62  enum2DTensorTilingConfigs(inputsShape,
63  maxTileSize,
64  { 1, kVectorSize },
65  { 1, kVectorSize },
66  inputsConfigs);
67  } else {
68  inputsConfigs.push_back(inputsShape);
69  }
70  assert(!inputsConfigs.empty() && "No tiling configurations found!");
71 
72  // Fill in weights and outputs.
73  for (auto it = inputsConfigs.begin(); it != inputsConfigs.end(); ++it) {
74  TensorShape& inputsConfig = *it;
75  if (weightTilingDims == DimNC) {
76  if (needsCwiseTiling(inputTilingDims)) {
77  // If the inputs are also tiled activation-wise, then the
78  // weights have to take the same activations dimension.
79  TilingConfig config;
80  config.weights = weightsShape;
81  config.weights[1] = inputsConfig[1];
82  if (config.weights.storageSize() <= maxTileSize) {
83  config.inputs = inputsConfig;
84  config.outputs = inputsConfig;
85  fullConfigs.push_back(config);
86  } else {
87  break;
88  }
89  } else {
90  int minChannels = std::min(weightsShape[1], kVectorSize);
91  for (int c = minChannels; c <= weightsShape[1];
92  c += kVectorSize) {
93  TilingConfig config;
94  config.weights = weightsShape;
95  config.weights[1] = c;
96  if (config.weights.storageSize() <= maxTileSize) {
97  config.inputs = inputsConfig;
98  config.outputs = inputsConfig;
99  fullConfigs.push_back(config);
100  } else {
101  break;
102  }
103  }
104  }
105  } else {
106  TilingConfig config(inputsConfig, weightsShape, inputsConfig);
107  fullConfigs.push_back(config);
108  }
109  }
110  assert(!fullConfigs.empty() && "No tiling configurations found!");
111 }
112 
113 void TilingOptimizer::enumPostConvTilingConfigs(
114  TensorShape inputsShape,
115  TensorShape weightsShape,
116  int maxTileSize,
117  std::array<TilingDims, 2> strategies,
118  std::list<TilingConfig>& fullConfigs) {
119  TilingDims inputTilingDims = strategies[0];
120  TilingDims weightTilingDims = strategies[1];
121  // Supported tiling dims: DimN, DimNC, DimNH, DimNW, DimNHW, DimNCH and
122  // DimNCW for inputs. None for weights for now. For a 32KB weights spad, it
123  // would mean the weights have more than 4096 channels if tiling is
124  // required.
125  // TODO: add other tiling dims for weights if we need that later.
126  // Enumerate all input shapes that fit and then fill the
127  // tiling configurations with weights and outputs. For all tiling strategy,
128  // compute the total SRAM utilization. The highest one is the chosen one.
129  assert(inputTilingDims == None || inputTilingDims == DimN ||
130  inputTilingDims == DimNC || inputTilingDims == DimNH ||
131  inputTilingDims == DimNW || inputTilingDims == DimNHW ||
132  inputTilingDims == DimNCH || inputTilingDims == DimNCW);
133  assert(weightTilingDims == None);
134  std::vector<TensorShape> inputsConfigs;
135  if (inputTilingDims == DimN) {
136  std::vector<int> minShape = inputsShape.dims();
137  minShape[0] = 1;
138  enum4DTensorTilingConfigs(inputsShape,
139  maxTileSize,
140  minShape,
141  { 1, 1, 1, 1 },
142  inputsConfigs);
143  } else if (inputTilingDims == DimNC) {
144  std::vector<int> minShape = inputsShape.dims();
145  minShape[0] = 1;
146  minShape[3] = kVectorSize;
147  enum4DTensorTilingConfigs(inputsShape,
148  maxTileSize,
149  minShape,
150  { 1, 1, 1, kVectorSize },
151  inputsConfigs);
152  } else if (inputTilingDims == DimNH) {
153  std::vector<int> minShape = inputsShape.dims();
154  minShape[0] = 1;
155  minShape[1] = kVectorSize;
156  enum4DTensorTilingConfigs(inputsShape,
157  maxTileSize,
158  minShape,
159  { 1, kVectorSize, 1, 1 },
160  inputsConfigs);
161  } else if (inputTilingDims == DimNW) {
162  std::vector<int> minShape = inputsShape.dims();
163  minShape[0] = 1;
164  minShape[2] = kVectorSize;
165  enum4DTensorTilingConfigs(inputsShape,
166  maxTileSize,
167  minShape,
168  { 1, 1, kVectorSize, 1 },
169  inputsConfigs);
170  } else if (inputTilingDims == DimNHW) {
171  std::vector<int> minShape = { 1, kVectorSize, kVectorSize,
172  inputsShape[3] };
173  enum4DTensorTilingConfigs(inputsShape,
174  maxTileSize,
175  minShape,
176  { 1, kVectorSize, kVectorSize, 1 },
177  inputsConfigs);
178  } else if (inputTilingDims == DimNCH) {
179  std::vector<int> minShape = { 1, kVectorSize, inputsShape[2],
180  kVectorSize };
181  enum4DTensorTilingConfigs(inputsShape,
182  maxTileSize,
183  minShape,
184  { 1, kVectorSize, 1, kVectorSize },
185  inputsConfigs);
186  } else if (inputTilingDims == DimNCW) {
187  std::vector<int> minShape = { 1, inputsShape[1], kVectorSize,
188  kVectorSize };
189  enum4DTensorTilingConfigs(inputsShape,
190  maxTileSize,
191  minShape,
192  { 1, 1, kVectorSize, kVectorSize },
193  inputsConfigs);
194  } else {
195  inputsConfigs.push_back(inputsShape);
196  }
197  assert(!inputsConfigs.empty() && "No tiling configurations found!");
198 
199  // Fill in weights and outputs.
200  for (auto it = inputsConfigs.begin(); it != inputsConfigs.end(); ++it) {
201  TilingConfig config(*it, weightsShape, *it);
202  fullConfigs.push_back(config);
203  }
204  assert(!fullConfigs.empty() && "No tiling configurations found!");
205 }
206 
208  Tensor* weights,
209  Tensor* outputs) {
210  int maxTileSize = SmvBackend::SpadSize() / inputs->getDataTypeSize();
211  // The outputs have the same shape as the inputs. No need to tile it.
212  assert(inputs->getShape() == outputs->getShape());
213  std::array<TilingDims, 2> strategies =
214  determineBestTilingDims(inputs, weights, maxTileSize);
215  TilingDims inputTilingDims = strategies[0];
216  TilingDims weightTilingDims = strategies[1];
217  TilingDims outputTilingDims = inputTilingDims;
218 
219  dout(2) << " Tiling dimensions chosen: \n"
220  << " input: " << inputTilingDims
221  << ", weight: " << weightTilingDims
222  << ", output: " << inputTilingDims << "\n";
223 
224  TensorShape inputsShape = inputs->getShape();
225  TensorShape weightsShape = weights->getShape();
226  std::list<TilingConfig> fullConfigs;
227  bool isPostConv = (inputs->ndims() == 4);
228  if (isPostConv) {
229  enumPostConvTilingConfigs(inputsShape,
230  weightsShape,
231  maxTileSize,
232  strategies,
233  fullConfigs);
234  } else {
235  enumPostFCTilingConfigs(inputsShape,
236  weightsShape,
237  maxTileSize,
238  strategies,
239  fullConfigs);
240  }
241 
242  dout(2) << " Number of possible tiling configs: " << fullConfigs.size()
243  << "\n";
244  for (auto& config : fullConfigs)
245  dout(2) << " " << config << "\n";
246  auto maxIt = std::max_element(
247  fullConfigs.begin(),
248  fullConfigs.end(),
249  [](const TilingConfig& c1, const TilingConfig& c2) {
250  return c1.getTotalSize() < c2.getTotalSize();
251  });
252  assert(maxIt != fullConfigs.end() && "Failed to get best tiling config!");
253  // Fill in the tiling dims.
254  (*maxIt).inputTilingDims = inputTilingDims;
255  (*maxIt).weightTilingDims = weightTilingDims;
256  (*maxIt).outputTilingDims = outputTilingDims;
257  return *maxIt;
258 }
259 
260 std::array<TiledTensor, 3> TilingOptimizer::doTiling(SmvBatchNormOp* op) {
261  auto inputs = op->getInput(SmvBatchNormOp::Inputs);
262  auto mean = op->getInput(SmvBatchNormOp::Mean);
263  auto variance = op->getInput(SmvBatchNormOp::Variance);
264  auto gamma = op->getInput(SmvBatchNormOp::Gamma);
265  auto beta = op->getInput(SmvBatchNormOp::Beta);
266  // Concatenate the four weight tensors into one.
267  auto weights = concatTensors(
268  { mean, variance, gamma, beta }, 0, op->getWorkspace());
269  auto outputs = op->getOutput(SmvBatchNormOp::Outputs);
270  TilingConfig tileConfig =
271  TilingOptimizer::computeBasicTileShapes(inputs, weights, outputs);
272  TiledTensor tiledInputs =
273  generateTiledTensor(inputs, tileConfig.inputs, op);
274  // Copy data for the weight tiles since the data is read-only.
275  TiledTensor tiledWeights =
276  generateTiledTensor(weights, tileConfig.weights,
277  op, /* copyData */ true);
278  TiledTensor tiledOutputs =
279  generateTiledTensor(outputs, tileConfig.inputs, op);
280  return { tiledInputs, tiledWeights, tiledOutputs };
281 }
282 
283 } // namespace bn
284 } // namespace smv
285 } // namespace smaug
smaug::Tensor
Tensor represents a single multi-dimensional array of data.
Definition: tensor.h:344
smaug::smv::TilingOptimizerBase::enum4DTensorTilingConfigs
static void enum4DTensorTilingConfigs(TensorShape shape, int maxTileSize, const std::vector< int > &minShape, const std::vector< int > &strides, std::vector< TensorShape > &configs)
Enumerates all tiling configs for a four dimensional Tensor.
Definition: smv_tiling_base.cpp:78
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
smaug::concatTensors
Tensor * concatTensors(std::vector< Tensor * > inputTensors, int concatDim, Workspace *workspace)
Concatenates Tensors on the specified dimension into one single tensor.
Definition: tensor_utils.cpp:357
smaug::smv::TilingDims
TilingDims
The set of supported tiling strategies.
Definition: smv_tiling_common.h:13
smaug::smv::TilingOptimizerBase::enum2DTensorTilingConfigs
static void enum2DTensorTilingConfigs(TensorShape shape, int maxTileSize, const std::vector< int > &minShape, const std::vector< int > &strides, std::vector< TensorShape > &configs)
Enumerates all tiling configs for a two dimensional Tensor.
Definition: smv_tiling_base.cpp:56
smaug::TiledTensor
A multidimensional container of Tensors.
Definition: tensor.h:552
smaug::SmvBatchNormOp
SMV backend implementation of batch normalization.
Definition: smv_batch_norm_op.h:27
smaug::TensorShape
TensorShape describes the shape of a Tensor.
Definition: tensor.h:35
smaug::smv::TilingOptimizerBase::findBestTilingDims
static TilingDims findBestTilingDims(const TensorShape &shape, int maxTileSize, const std::vector< int > &minShape)
Find the best set of dimensions to tile a given tensor shape.
Definition: smv_tiling_base.cpp:10
smaug::smv::TilingConfig
A TilingConfig describes tiling strategies and optimal tile sizes for inputs, weights,...
Definition: smv_tiling_common.h:29
smaug::smv::bn::TilingOptimizer::doTiling
static std::array< TiledTensor, 3 > doTiling(SmvBatchNormOp *op)
Runs the tiling optimizer on the given batch norm op.
Definition: smv_batch_norm_tiling.cpp:260
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
smaug::smv::bn::TilingOptimizer::computeBasicTileShapes
static TilingConfig computeBasicTileShapes(Tensor *inputs, Tensor *weights, Tensor *outputs)
Determine the best basic tiling shape for this batch norm layer.
Definition: smv_batch_norm_tiling.cpp:207
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::smv::bn::TilingOptimizer::determineBestTilingDims
static std::array< TilingDims, 2 > determineBestTilingDims(Tensor *inputs, Tensor *weights, int maxTileSize)
Determine the best tiling dimensions for running batch norm on SMV.
Definition: smv_batch_norm_tiling.cpp:13
smaug::generateTiledTensor
TiledTensor generateTiledTensor(Tensor *tensor, const TensorShape &tileShape, Operator *op, bool copyData)
Generates a TiledTensor from a source Tensor.
Definition: tensor_utils.cpp:335