1 #include "smaug/operators/smv/smv_greater_op.h" 
    2 #include "smaug/core/backend.h" 
    4 #include "smaug/operators/smv/smv_kernels.h" 
    5 #include "smaug/operators/smv/smv_unary_op_common.h" 
    6 #include "smaug/utility/debug_stream.h" 
   10 void SmvGreaterOp::runX(TiledTensor& inputs0,
 
   12                         TiledTensor& outputs) {
 
   13     assert(inputs0.size() == inputs1.size() &&
 
   14            inputs0.size() == outputs.size());
 
   16             smv::kEltwiseOpHw, 
"host_inputs0", getInputsMemType());
 
   18             smv::kEltwiseOpHw, 
"host_inputs1", getInputsMemType());
 
   20             smv::kEltwiseOpHw, 
"host_results", getOutputsMemType());
 
   21     for (
int i = 0; i < inputs0.size(); i++) {
 
   22         dout(1) << 
"Input0: " << i << 
", input1: " << i << 
", output: " << i
 
   24         Tensor* input0Tile = inputs0.getTileWithData(i);
 
   25         Tensor* input1Tile = inputs1.getTileWithData(i);
 
   26         Tensor* outputTile = outputs[i];
 
   27         const TensorShape& inputShape = input0Tile->getShape();
 
   28         const TensorShape& outputShape = outputTile->getShape();
 
   30                         input0Tile->data<float16>(),
 
   31                         inputShape.storageSize() * 
sizeof(float16));
 
   33                         input1Tile->data<float16>(),
 
   34                         inputShape.storageSize() * 
sizeof(float16));
 
   36                         outputTile->data<
bool>(),
 
   37                         outputShape.storageSize() * 
sizeof(
bool));
 
   40                      input0Tile->data<float16>(), input1Tile->data<float16>(),
 
   41                      outputTile->data<
bool>(), smv::spad0, smv::spad1,
 
   42                      reinterpret_cast<bool*
>(smv::spad2),
 
   43                      inputShape.storageSize());
 
   47 void SmvGreaterOp::tile() {
 
   50     auto inputs0 = getInput(Input0);
 
   51     auto inputs1 = getInput(Input1);
 
   52     auto outputs = getOutput(Outputs);
 
   54             std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
 
   55                      inputs0->getShape().storageSize());
 
   56     TensorShape tileShape(
 
   57             { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
 
   66 void SmvGreaterOp::run() {
 
   67     auto inputs0 = getInput(Input0);
 
   68     auto inputs1 = getInput(Input1);
 
   69     auto outputs = getOutput(Outputs);
 
   70     const TensorShape& inputs0Shape = inputs0->getShape();
 
   71     const TensorShape& inputs1Shape = inputs1->getShape();
 
   72     const TensorShape& outputsShape = outputs->getShape();
 
   73     assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
 
   76         auto stats = gem5::ScopedStats(
 
   77                 stats::kTensorPrepStart, stats::kTensorPrepEnd);
 
   78         tiledTensors[0].copyDataToAllTiles();
 
   79         tiledTensors[1].copyDataToAllTiles();
 
   82     runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
 
   85         auto stats = gem5::ScopedStats(
 
   86                 stats::kTensorFinalStart, stats::kTensorFinalEnd);
 
   91 void SmvGreaterEqualOp::runX(TiledTensor& inputs0,
 
   93                              TiledTensor& outputs) {
 
   94     assert(inputs0.size() == inputs1.size() &&
 
   95            inputs0.size() == outputs.size());
 
   97             smv::kEltwiseOpHw, 
"host_inputs0", getInputsMemType());
 
   99             smv::kEltwiseOpHw, 
"host_inputs1", getInputsMemType());
 
  101             smv::kEltwiseOpHw, 
"host_results", getOutputsMemType());
 
  102     for (
int i = 0; i < inputs0.size(); i++) {
 
  103         dout(1) << 
"Input0: " << i << 
", input1: " << i << 
", output: " << i
 
  105         Tensor* input0Tile = inputs0.getTileWithData(i);
 
  106         Tensor* input1Tile = inputs1.getTileWithData(i);
 
  107         Tensor* outputTile = outputs[i];
 
  108         const TensorShape& inputShape = input0Tile->getShape();
 
  109         const TensorShape& outputShape = outputTile->getShape();
 
  111                         input0Tile->data<float16>(),
 
  112                         inputShape.storageSize() * 
sizeof(float16));
 
  114                         input1Tile->data<float16>(),
 
  115                         inputShape.storageSize() * 
sizeof(float16));
 
  117                         outputTile->data<
bool>(),
 
  118                         outputShape.storageSize() * 
sizeof(
bool));
 
  121                      input0Tile->data<float16>(), input1Tile->data<float16>(),
 
  122                      outputTile->data<
bool>(), smv::spad0, smv::spad1,
 
  123                      reinterpret_cast<bool*
>(smv::spad2),
 
  124                      inputShape.storageSize());
 
  128 void SmvGreaterEqualOp::tile() {
 
  131     auto inputs0 = getInput(Input0);
 
  132     auto inputs1 = getInput(Input1);
 
  133     auto outputs = getOutput(Outputs);
 
  135             std::min(SmvBackend::SpadSize() / inputs0->getDataTypeSize(),
 
  136                      inputs0->getShape().storageSize());
 
  137     TensorShape tileShape(
 
  138             { 1, maxTileSize }, DataLayout::NC, SmvBackend::Alignment);
 
  147 void SmvGreaterEqualOp::run() {
 
  148     auto inputs0 = getInput(Input0);
 
  149     auto inputs1 = getInput(Input1);
 
  150     auto outputs = getOutput(Outputs);
 
  151     const TensorShape& inputs0Shape = inputs0->getShape();
 
  152     const TensorShape& inputs1Shape = inputs1->getShape();
 
  153     const TensorShape& outputsShape = outputs->getShape();
 
  154     assert(inputs0Shape == inputs1Shape && inputs0Shape == outputsShape);
 
  157         auto stats = gem5::ScopedStats(
 
  158                 stats::kTensorPrepStart, stats::kTensorPrepEnd);
 
  159         tiledTensors[0].copyDataToAllTiles();
 
  160         tiledTensors[1].copyDataToAllTiles();
 
  163     runX(tiledTensors[0], tiledTensors[1], tiledTensors[2]);
 
  166         auto stats = gem5::ScopedStats(
 
  167                 stats::kTensorFinalStart, stats::kTensorFinalEnd);