1 #include "smaug/operators/smv/smv_greater_op.h"
2 #include "smaug/core/backend.h"
4 #include "smaug/operators/smv/smv_kernels.h"
5 #include "smaug/operators/smv/smv_unary_op_common.h"
6 #include "smaug/utility/debug_stream.h"
10 void SmvGreaterOp::runX(TiledTensor& inputs0,
12 TiledTensor& outputs) {
13 assert(inputs0.size() == inputs1.size() &&
14 inputs0.size() == outputs.size());
16 smv::kEltwiseOpHw,
"host_inputs0", getInputsMemType());
18 smv::kEltwiseOpHw,
"host_inputs1", getInputsMemType());
20 smv::kEltwiseOpHw,
"host_results", getOutputsMemType());
21 for (
int i = 0; i < inputs0.size(); i++) {
22 dout(1) <<
"Input0: " << i <<
", input1: " << i <<
", output: " << i
24 Tensor* input0Tile = inputs0.getTileWithData(i);
25 Tensor* input1Tile = inputs1.getTileWithData(i);
26 Tensor* outputTile = outputs[i];
27 const TensorShape& inputShape = input0Tile->getShape();
28 const TensorShape& outputShape = outputTile->getShape();
30 input0Tile->data<float16>(),
31 inputShape.storageSize() *
sizeof(float16));
33 input1Tile->data<float16>(),
34 inputShape.storageSize() *
sizeof(float16));
36 outputTile->data<
bool>(),
37 outputShape.storageSize() *
sizeof(
bool));
40 input0Tile->data<float16>(), input1Tile->data<float16>(),
41 outputTile->data<
bool>(), smv::spad0, smv::spad1,
42 reinterpret_cast<bool*
>(smv::spad2),
43 inputShape.storageSize());
47 void SmvGreaterOp::tile() {
50 auto inputs0 = getInput(Input0);
51 auto inputs1 = getInput(Input1);
52 auto outputs = getOutput(Outputs);
54 std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
55 inputs0->getShape().storageSize());
56 TensorShape tileShape(
57 { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
66 void SmvGreaterOp::run() {
67 auto inputs0 = getInput(Input0);
68 auto inputs1 = getInput(Input1);
69 auto outputs = getOutput(Outputs);
70 const TensorShape& inputs0Shape = inputs0->getShape();
71 const TensorShape& inputs1Shape = inputs1->getShape();
72 const TensorShape& outputsShape = outputs->getShape();
73 assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
76 auto stats = gem5::ScopedStats(
77 stats::kTensorPrepStart, stats::kTensorPrepEnd);
78 tiledTensors[0].copyDataToAllTiles();
79 tiledTensors[1].copyDataToAllTiles();
82 runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
85 auto stats = gem5::ScopedStats(
86 stats::kTensorFinalStart, stats::kTensorFinalEnd);
91 void SmvGreaterEqualOp::runX(TiledTensor& inputs0,
93 TiledTensor& outputs) {
94 assert(inputs0.size() == inputs1.size() &&
95 inputs0.size() == outputs.size());
97 smv::kEltwiseOpHw,
"host_inputs0", getInputsMemType());
99 smv::kEltwiseOpHw,
"host_inputs1", getInputsMemType());
101 smv::kEltwiseOpHw,
"host_results", getOutputsMemType());
102 for (
int i = 0; i < inputs0.size(); i++) {
103 dout(1) <<
"Input0: " << i <<
", input1: " << i <<
", output: " << i
105 Tensor* input0Tile = inputs0.getTileWithData(i);
106 Tensor* input1Tile = inputs1.getTileWithData(i);
107 Tensor* outputTile = outputs[i];
108 const TensorShape& inputShape = input0Tile->getShape();
109 const TensorShape& outputShape = outputTile->getShape();
111 input0Tile->data<float16>(),
112 inputShape.storageSize() *
sizeof(float16));
114 input1Tile->data<float16>(),
115 inputShape.storageSize() *
sizeof(float16));
117 outputTile->data<
bool>(),
118 outputShape.storageSize() *
sizeof(
bool));
121 input0Tile->data<float16>(), input1Tile->data<float16>(),
122 outputTile->data<
bool>(), smv::spad0, smv::spad1,
123 reinterpret_cast<bool*
>(smv::spad2),
124 inputShape.storageSize());
128 void SmvGreaterEqualOp::tile() {
131 auto inputs0 = getInput(Input0);
132 auto inputs1 = getInput(Input1);
133 auto outputs = getOutput(Outputs);
135 std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
136 inputs0->getShape().storageSize());
137 TensorShape tileShape(
138 { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
147 void SmvGreaterEqualOp::run() {
148 auto inputs0 = getInput(Input0);
149 auto inputs1 = getInput(Input1);
150 auto outputs = getOutput(Outputs);
151 const TensorShape& inputs0Shape = inputs0->getShape();
152 const TensorShape& inputs1Shape = inputs1->getShape();
153 const TensorShape& outputsShape = outputs->getShape();
154 assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
157 auto stats = gem5::ScopedStats(
158 stats::kTensorPrepStart, stats::kTensorPrepEnd);
159 tiledTensors[0].copyDataToAllTiles();
160 tiledTensors[1].copyDataToAllTiles();
163 runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
166 auto stats = gem5::ScopedStats(
167 stats::kTensorFinalStart, stats::kTensorFinalEnd);