SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smv_less_op.cpp
1 #include "smaug/operators/smv/smv_less_op.h"
2 #include "smaug/core/backend.h"
4 #include "smaug/operators/smv/smv_kernels.h"
5 #include "smaug/operators/smv/smv_unary_op_common.h"
6 #include "smaug/utility/debug_stream.h"
7 
8 namespace smaug {
9 
10 void SmvLessOp::runX(TiledTensor& inputs0,
11  TiledTensor& inputs1,
12  TiledTensor& outputs) {
13  assert(inputs0.size() == inputs1.size() &&
14  inputs0.size() == outputs.size());
16  smv::kEltwiseOpHw, "host_inputs0", getInputsMemType());
18  smv::kEltwiseOpHw, "host_inputs1", getInputsMemType());
20  smv::kEltwiseOpHw, "host_results", getOutputsMemType());
21  for (int i = 0; i < inputs0.size(); i++) {
22  dout(1) << "Input0: " << i << ", input1: " << i << ", output: " << i
23  << "\n";
24  Tensor* input0Tile = inputs0.getTileWithData(i);
25  Tensor* input1Tile = inputs1.getTileWithData(i);
26  Tensor* outputTile = outputs[i];
27  const TensorShape& inputShape = input0Tile->getShape();
28  const TensorShape& outputShape = outputTile->getShape();
29  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs0",
30  input0Tile->data<float16>(),
31  inputShape.storageSize() * sizeof(float16));
32  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs1",
33  input1Tile->data<float16>(),
34  inputShape.storageSize() * sizeof(float16));
35  mapArrayToAccel(smv::kEltwiseOpHw, "host_results",
36  outputTile->data<bool>(),
37  outputShape.storageSize() * sizeof(bool));
38 
39  invokeKernel(smv::kEltwiseOpHw, smv_less_nc_vec_fxp,
40  input0Tile->data<float16>(), input1Tile->data<float16>(),
41  outputTile->data<bool>(), smv::spad0, smv::spad1,
42  reinterpret_cast<bool*>(smv::spad2),
43  inputShape.storageSize());
44  }
45 }
46 
47 void SmvLessOp::tile() {
48  // We reuse the unary op tiler for the elementwise addition operator.
49  using namespace smaug::smv::unary;
50  auto inputs0 = getInput(Input0);
51  auto inputs1 = getInput(Input1);
52  auto outputs = getOutput(Outputs);
53  int maxTileSize =
54  std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
55  inputs0->getShape().storageSize());
56  TensorShape tileShape(
57  { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
58  tiledTensors[0] =
59  generateTiledTensorPerBatchNC(inputs0, tileShape, this, false);
60  tiledTensors[1] =
61  generateTiledTensorPerBatchNC(inputs1, tileShape, this, false);
62  tiledTensors[2] =
63  generateTiledTensorPerBatchNC(outputs, tileShape, this, false);
64 }
65 
66 void SmvLessOp::run() {
67  auto inputs0 = getInput(Input0);
68  auto inputs1 = getInput(Input1);
69  auto outputs = getOutput(Outputs);
70  const TensorShape& inputs0Shape = inputs0->getShape();
71  const TensorShape& inputs1Shape = inputs1->getShape();
72  const TensorShape& outputsShape = outputs->getShape();
73  assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
74 
75  {
76  auto stats = gem5::ScopedStats(
77  stats::kTensorPrepStart, stats::kTensorPrepEnd);
78  tiledTensors[0].copyDataToAllTiles();
79  tiledTensors[1].copyDataToAllTiles();
80  }
81 
82  runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
83 
84  {
85  auto stats = gem5::ScopedStats(
86  stats::kTensorFinalStart, stats::kTensorFinalEnd);
87  flattenTiledTensor(tiledTensors[2], outputs);
88  }
89 }
90 
91 void SmvLessEqualOp::runX(TiledTensor& inputs0,
92  TiledTensor& inputs1,
93  TiledTensor& outputs) {
94  assert(inputs0.size() == inputs1.size() &&
95  inputs0.size() == outputs.size());
97  smv::kEltwiseOpHw, "host_inputs0", getInputsMemType());
99  smv::kEltwiseOpHw, "host_inputs1", getInputsMemType());
101  smv::kEltwiseOpHw, "host_results", getOutputsMemType());
102  for (int i = 0; i < inputs0.size(); i++) {
103  dout(1) << "Input0: " << i << ", input1: " << i << ", output: " << i
104  << "\n";
105  Tensor* input0Tile = inputs0.getTileWithData(i);
106  Tensor* input1Tile = inputs1.getTileWithData(i);
107  Tensor* outputTile = outputs[i];
108  const TensorShape& inputShape = input0Tile->getShape();
109  const TensorShape& outputShape = outputTile->getShape();
110  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs0",
111  input0Tile->data<float16>(),
112  inputShape.storageSize() * sizeof(float16));
113  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs1",
114  input1Tile->data<float16>(),
115  inputShape.storageSize() * sizeof(float16));
116  mapArrayToAccel(smv::kEltwiseOpHw, "host_results",
117  outputTile->data<bool>(),
118  outputShape.storageSize() * sizeof(bool));
119 
120  invokeKernel(smv::kEltwiseOpHw, smv_less_equal_nc_vec_fxp,
121  input0Tile->data<float16>(), input1Tile->data<float16>(),
122  outputTile->data<bool>(), smv::spad0, smv::spad1,
123  reinterpret_cast<bool*>(smv::spad2),
124  inputShape.storageSize());
125  }
126 }
127 
128 void SmvLessEqualOp::tile() {
129  // We reuse the unary op tiler for the elementwise addition operator.
130  using namespace smaug::smv::unary;
131  auto inputs0 = getInput(Input0);
132  auto inputs1 = getInput(Input1);
133  auto outputs = getOutput(Outputs);
134  int maxTileSize =
135  std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
136  inputs0->getShape().storageSize());
137  TensorShape tileShape(
138  { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
139  tiledTensors[0] =
140  generateTiledTensorPerBatchNC(inputs0, tileShape, this, false);
141  tiledTensors[1] =
142  generateTiledTensorPerBatchNC(inputs1, tileShape, this, false);
143  tiledTensors[2] =
144  generateTiledTensorPerBatchNC(outputs, tileShape, this, false);
145 }
146 
147 void SmvLessEqualOp::run() {
148  auto inputs0 = getInput(Input0);
149  auto inputs1 = getInput(Input1);
150  auto outputs = getOutput(Outputs);
151  const TensorShape& inputs0Shape = inputs0->getShape();
152  const TensorShape& inputs1Shape = inputs1->getShape();
153  const TensorShape& outputsShape = outputs->getShape();
154  assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
155 
156  {
157  auto stats = gem5::ScopedStats(
158  stats::kTensorPrepStart, stats::kTensorPrepEnd);
159  tiledTensors[0].copyDataToAllTiles();
160  tiledTensors[1].copyDataToAllTiles();
161  }
162 
163  runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
164 
165  {
166  auto stats = gem5::ScopedStats(
167  stats::kTensorFinalStart, stats::kTensorFinalEnd);
168  flattenTiledTensor(tiledTensors[2], outputs);
169  }
170 }
171 
172 } // namespace smaug
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
smv_less_equal_nc_vec_fxp
void smv_less_equal_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
Definition: compare.c:48
smaug::setArrayMemTypeIfSimulating
void setArrayMemTypeIfSimulating(unsigned reqCode, const char *arrayName, MemoryType memType)
Sets what memory access mechanism the accelerator will use when accessing this array.
Definition: common.cpp:21
smaug::generateTiledTensorPerBatchNC
TiledTensor generateTiledTensorPerBatchNC(Tensor *tensor, const TensorShape &tileShape, Operator *op, bool copyData)
Tile the provided NC Tensor per batch.
Definition: tensor_utils.cpp:199
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::smv::unary
Contains common functions for working with unary operators.
Definition: smv_unary_op_common.cpp:14
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72
smaug::flattenTiledTensor
void flattenTiledTensor(TiledTensor &tiledTensor, Tensor *destTensor)
Copies the data from each tile in a TiledTensor into a destination Tensor as a contiguous block of me...
Definition: tensor_utils.cpp:343
smv_less_nc_vec_fxp
void smv_less_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
Definition: compare.c:19