SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smv_eltwise_mul_op.cpp
1 #include "smaug/operators/smv/smv_eltwise_mul_op.h"
2 #include "smaug/core/backend.h"
4 #include "smaug/operators/smv/smv_kernels.h"
5 #include "smaug/operators/smv/smv_unary_op_common.h"
6 #include "smaug/utility/debug_stream.h"
7 
8 namespace smaug {
9 
10 // The tile dispatcher for elementwise multiplication.
11 void SmvEltwiseMulOp::runX(TiledTensor& inputs0,
12  TiledTensor& inputs1,
13  TiledTensor& outputs) {
14  assert(inputs0.size() == inputs1.size() &&
15  inputs0.size() == outputs.size());
17  smv::kEltwiseOpHw, "host_inputs0", getInputsMemType());
19  smv::kEltwiseOpHw, "host_inputs1", getInputsMemType());
21  smv::kEltwiseOpHw, "host_results", getOutputsMemType());
22  for (int i = 0; i < inputs0.size(); i++) {
23  dout(1) << "Input0: " << i << ", input1: " << i << ", output: " << i
24  << "\n";
25  Tensor* input0Tile = inputs0.getTileWithData(i);
26  Tensor* input1Tile = inputs1.getTileWithData(i);
27  Tensor* outputTile = outputs[i];
28  const TensorShape& inputShape = input0Tile->getShape();
29  const TensorShape& outputShape = outputTile->getShape();
30  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs0",
31  input0Tile->data<float16>(),
32  inputShape.storageSize() * sizeof(float16));
33  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs1",
34  input1Tile->data<float16>(),
35  inputShape.storageSize() * sizeof(float16));
36  mapArrayToAccel(smv::kEltwiseOpHw, "host_results",
37  outputTile->data<float16>(),
38  outputShape.storageSize() * sizeof(float16));
39 
40  invokeKernel(smv::kEltwiseOpHw, smv_eltwise_mul_nc_vec_fxp,
41  input0Tile->data<float16>(), input1Tile->data<float16>(),
42  outputTile->data<float16>(), smv::spad0, smv::spad1,
43  smv::spad2, inputShape.storageSize());
44  }
45 }
46 
47 void SmvEltwiseMulOp::tile() {
48  // We reuse the unary op tiler for the elementwise multiplication operator.
49  using namespace smaug::smv::unary;
50  auto inputs0 = getInput(Input0);
51  auto inputs1 = getInput(Input1);
52  auto outputs = getOutput(Outputs);
53  int maxTileSize =
54  std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
55  inputs0->getShape().storageSize());
56  TensorShape tileShape(
57  { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
58  tiledTensors[0] =
59  generateTiledTensorPerBatchNC(inputs0, tileShape, this, false);
60  tiledTensors[1] =
61  generateTiledTensorPerBatchNC(inputs1, tileShape, this, false);
62  tiledTensors[2] =
63  generateTiledTensorPerBatchNC(outputs, tileShape, this, false);
64 }
65 
66 void SmvEltwiseMulOp::run() {
67  auto inputs0 = getInput(Input0);
68  auto inputs1 = getInput(Input1);
69  auto outputs = getOutput(Outputs);
70  const TensorShape& inputs0Shape = inputs0->getShape();
71  const TensorShape& inputs1Shape = inputs1->getShape();
72  const TensorShape& outputsShape = outputs->getShape();
73  assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
74 
75  {
76  auto stats = gem5::ScopedStats(
77  stats::kTensorPrepStart, stats::kTensorPrepEnd);
78  tiledTensors[0].copyDataToAllTiles();
79  tiledTensors[1].copyDataToAllTiles();
80  }
81 
82  runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
83 
84  {
85  auto stats = gem5::ScopedStats(
86  stats::kTensorFinalStart, stats::kTensorFinalEnd);
87  flattenTiledTensor(tiledTensors[2], outputs);
88  }
89 }
90 
91 } // namespace smaug
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
smaug::setArrayMemTypeIfSimulating
void setArrayMemTypeIfSimulating(unsigned reqCode, const char *arrayName, MemoryType memType)
Sets what memory access mechanism the accelerator will use when accessing this array.
Definition: common.cpp:21
smaug::generateTiledTensorPerBatchNC
TiledTensor generateTiledTensorPerBatchNC(Tensor *tensor, const TensorShape &tileShape, Operator *op, bool copyData)
Tile the provided NC Tensor per batch.
Definition: tensor_utils.cpp:199
smv_eltwise_mul_nc_vec_fxp
void smv_eltwise_mul_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, float16 *host_results, float *inputs0, float *inputs1, float *results, int inputs_size)
Definition: eltwise_mul.c:13
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::smv::unary
Contains common functions for working with unary operators.
Definition: smv_unary_op_common.cpp:14
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72
smaug::flattenTiledTensor
void flattenTiledTensor(TiledTensor &tiledTensor, Tensor *destTensor)
Copies the data from each tile in a TiledTensor into a destination Tensor as a contiguous block of me...
Definition: tensor_utils.cpp:343