1 #include "smaug/operators/smv/smv_softmax_op.h"
2 #include "smaug/operators/smv/smv_kernels.h"
3 #include "smaug/utility/debug_stream.h"
7 void SmvSoftmaxOp::tile() {
8 auto inputs = getInput(0);
9 auto outputs = getOutput(0);
10 const TensorShape& shape = inputs->getShape();
11 if (shape.getStorageDim(1) >
12 SmvBackend::SpadSize() / inputs->getDataTypeSize()) {
13 assert(
false &&
"For softmax, a single tile must fit in the local "
18 std::min(SmvBackend::SpadSize() / inputs->getDataTypeSize() /
19 shape.getStorageDim(1),
21 TensorShape tileShape(
22 { maxInputs, shape[1] }, DataLayout::NC, SmvBackend::Alignment);
27 void SmvSoftmaxOp::run() {
28 TiledTensor& inputs = tiledTensors[0];
29 TiledTensor& outputs = tiledTensors[1];
30 assert(inputs.size() == outputs.size());
32 smv::kEltwiseOpHw,
"host_inputs", getInputsMemType());
34 smv::kEltwiseOpHw,
"host_results", getOutputsMemType());
35 for (
int i = 0; i < inputs.size(); i++) {
36 dout(1) <<
"Input: " << i <<
", output: " << i <<
"\n";
37 Tensor* inputTile = inputs.getTileWithData(i);
38 Tensor* outputTile = outputs[i];
39 const TensorShape& inputShape = inputTile->getShape();
40 const TensorShape& outputShape = outputTile->getShape();
42 inputTile->data<float16>(),
43 inputShape.storageSize() *
sizeof(float16));
45 outputTile->data<float16>(),
46 outputShape.storageSize() *
sizeof(float16));
48 inputTile->data<float16>(), outputTile->data<float16>(),
49 smv::spad0, smv::spad1, inputShape[0], inputShape[1],
50 inputShape.getPadding(1));
53 auto stats = gem5::ScopedStats(
54 stats::kTensorFinalStart, stats::kTensorFinalEnd);