SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smv_softmax_op.cpp
1 #include "smaug/operators/smv/smv_softmax_op.h"
2 #include "smaug/operators/smv/smv_kernels.h"
3 #include "smaug/utility/debug_stream.h"
4 
5 namespace smaug {
6 
7 void SmvSoftmaxOp::tile() {
8  auto inputs = getInput(0);
9  auto outputs = getOutput(0);
10  const TensorShape& shape = inputs->getShape();
11  if (shape.getStorageDim(1) >
12  SmvBackend::SpadSize() / inputs->getDataTypeSize()) {
13  assert(false && "For softmax, a single tile must fit in the local "
14  "scratchpad size!");
15  }
16  // We can only tile on the N dimension.
17  int maxInputs =
18  std::min(SmvBackend::SpadSize() / inputs->getDataTypeSize() /
19  shape.getStorageDim(1),
20  shape[0]);
21  TensorShape tileShape(
22  { maxInputs, shape[1] }, DataLayout::NC, SmvBackend::Alignment);
23  tiledTensors[0] = generateTiledTensor(inputs, tileShape, this);
24  tiledTensors[1] = generateTiledTensor(outputs, tileShape, this);
25 }
26 
27 void SmvSoftmaxOp::run() {
28  TiledTensor& inputs = tiledTensors[0];
29  TiledTensor& outputs = tiledTensors[1];
30  assert(inputs.size() == outputs.size());
32  smv::kEltwiseOpHw, "host_inputs", getInputsMemType());
34  smv::kEltwiseOpHw, "host_results", getOutputsMemType());
35  for (int i = 0; i < inputs.size(); i++) {
36  dout(1) << "Input: " << i << ", output: " << i << "\n";
37  Tensor* inputTile = inputs.getTileWithData(i);
38  Tensor* outputTile = outputs[i];
39  const TensorShape& inputShape = inputTile->getShape();
40  const TensorShape& outputShape = outputTile->getShape();
41  mapArrayToAccel(smv::kEltwiseOpHw, "host_inputs",
42  inputTile->data<float16>(),
43  inputShape.storageSize() * sizeof(float16));
44  mapArrayToAccel(smv::kEltwiseOpHw, "host_results",
45  outputTile->data<float16>(),
46  outputShape.storageSize() * sizeof(float16));
47  invokeKernel(smv::kEltwiseOpHw, smv_softmax_nc_vec_fxp,
48  inputTile->data<float16>(), outputTile->data<float16>(),
49  smv::spad0, smv::spad1, inputShape[0], inputShape[1],
50  inputShape.getPadding(1));
51  }
52  {
53  auto stats = gem5::ScopedStats(
54  stats::kTensorFinalStart, stats::kTensorFinalEnd);
55  outputs.untile();
56  }
57 }
58 
59 } // namespace smaug
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
smaug::setArrayMemTypeIfSimulating
void setArrayMemTypeIfSimulating(unsigned reqCode, const char *arrayName, MemoryType memType)
Sets what memory access mechanism the accelerator will use when accessing this array.
Definition: common.cpp:21
smv_softmax_nc_vec_fxp
void smv_softmax_nc_vec_fxp(float16 *host_inputs, float16 *host_results, float *inputs, float *results, int input_num, int input_size, int input_pad)
Definition: activation_functions_simd.c:31
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
smaug::generateTiledTensor
TiledTensor generateTiledTensor(Tensor *tensor, const TensorShape &tileShape, Operator *op, bool copyData)
Generates a TiledTensor from a source Tensor.
Definition: tensor_utils.cpp:335
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72