SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smv_eltwise_add_op.cpp
1 #include "smaug/core/backend.h"
4 #include "smaug/operators/smv/smv_eltwise_add_op.h"
5 #include "smaug/operators/smv/smv_unary_op_common.h"
6 #include "smaug/operators/smv/smv_kernels.h"
7 #include "smaug/utility/debug_stream.h"
8 
9 namespace smaug {
10 
11 // The tile dispatcher for elementwise addition.
12 void SmvEltwiseAddOp::runX(TiledTensor& inputs0,
13  TiledTensor& inputs1,
14  TiledTensor& outputs) {
15  assert(inputs0.size() == inputs1.size() &&
16  inputs0.size() == outputs.size());
18  smv::kEltwiseOpHw, "host_inputs0", getInputsMemType());
20  smv::kEltwiseOpHw, "host_inputs1", getInputsMemType());
22  smv::kEltwiseOpHw, "host_results", getOutputsMemType());
23  for (int i = 0; i < inputs0.size(); i++) {
24  dout(1) << "Input0: " << i << ", input1: " << i << ", output: " << i
25  << "\n";
26  Tensor* input0Tile = inputs0.getTileWithData(i);
27  Tensor* input1Tile = inputs1.getTileWithData(i);
28  Tensor* outputTile = outputs[i];
29  const TensorShape& inputShape = input0Tile->getShape();
30  const TensorShape& outputShape = outputTile->getShape();
31  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs0",
32  input0Tile->data<float16>(),
33  inputShape.storageSize() * sizeof(float16));
34  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs1",
35  input1Tile->data<float16>(),
36  inputShape.storageSize() * sizeof(float16));
37  mapArrayToAccel(smv::kEltwiseOpHw, "host_results",
38  outputTile->data<float16>(),
39  outputShape.storageSize() * sizeof(float16));
40 
41  invokeKernel(smv::kEltwiseOpHw, smv_eltwise_add_nc_vec_fxp,
42  input0Tile->data<float16>(), input1Tile->data<float16>(),
43  outputTile->data<float16>(), smv::spad0, smv::spad1,
44  smv::spad2, inputShape.storageSize());
45  }
46 }
47 
48 void SmvEltwiseAddOp::tile() {
49  // We reuse the unary op tiler for the elementwise addition operator.
50  using namespace smaug::smv::unary;
51  auto inputs0 = getInput(Input0);
52  auto inputs1 = getInput(Input1);
53  auto outputs = getOutput(Outputs);
54  int maxTileSize =
55  std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
56  inputs0->getShape().storageSize());
57  TensorShape tileShape(
58  { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
59  tiledTensors[0] = generateTiledTensorPerBatchNC(
60  inputs0, tileShape, this, false);
61  tiledTensors[1] = generateTiledTensorPerBatchNC(
62  inputs1, tileShape, this, false);
63  tiledTensors[2] = generateTiledTensorPerBatchNC(
64  outputs, tileShape, this, false);
65 }
66 
67 void SmvEltwiseAddOp::run() {
68  auto inputs0 = getInput(Input0);
69  auto inputs1 = getInput(Input1);
70  auto outputs = getOutput(Outputs);
71  const TensorShape& inputs0Shape = inputs0->getShape();
72  const TensorShape& inputs1Shape = inputs1->getShape();
73  const TensorShape& outputsShape = outputs->getShape();
74  assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
75 
76  {
77  auto stats = gem5::ScopedStats(
78  stats::kTensorPrepStart, stats::kTensorPrepEnd);
79  tiledTensors[0].copyDataToAllTiles();
80  tiledTensors[1].copyDataToAllTiles();
81  }
82 
83  runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
84 
85  {
86  auto stats = gem5::ScopedStats(
87  stats::kTensorFinalStart, stats::kTensorFinalEnd);
88  flattenTiledTensor(tiledTensors[2], outputs);
89  }
90 }
91 
92 } // namespace smaug
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
tensor_utils.h
Utility functions for copying/printing/tiling tensors.
smaug::setArrayMemTypeIfSimulating
void setArrayMemTypeIfSimulating(unsigned reqCode, const char *arrayName, MemoryType memType)
Sets what memory access mechanism the accelerator will use when accessing this array.
Definition: common.cpp:21
smaug::generateTiledTensorPerBatchNC
TiledTensor generateTiledTensorPerBatchNC(Tensor *tensor, const TensorShape &tileShape, Operator *op, bool copyData)
Tile the provided NC Tensor per batch.
Definition: tensor_utils.cpp:199
smv_eltwise_add_nc_vec_fxp
void smv_eltwise_add_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, float16 *host_results, float *inputs0, float *inputs1, float *results, int inputs_size)
Definition: eltwise_add.c:13
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::smv::unary
Contains common functions for working with unary operators.
Definition: smv_unary_op_common.cpp:14
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72
smaug::flattenTiledTensor
void flattenTiledTensor(TiledTensor &tiledTensor, Tensor *destTensor)
Copies the data from each tile in a TiledTensor into a destination Tensor as a contiguous block of me...
Definition: tensor_utils.cpp:343