SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
ref_softmax_op.cpp
1 #include <cmath>
2 
3 #include "smaug/core/backend.h"
5 #include "smaug/operators/softmax_op.h"
6 
7 #ifdef __cplusplus
8 extern "C" {
9 #endif
10 
27 void ref_softmax_nc(float* inputs,
28  float* results,
29  int input_num,
30  int input_size,
31  int input_pad) {
32  dmaLoad(inputs, inputs,
33  input_num * (input_size + input_pad) * sizeof(float));
34  ARRAY_2D(float, _inputs, inputs, input_size + input_pad);
35  ARRAY_2D(float, _results, results, input_size + input_pad);
36 
37  // Compute the maximum of the elements in groups of 8 and the remainder one
38  // by one.
39  int max8_remainder = input_size - ((input_size >> 3) << 3);
40 
41  softmax_batch:
42  for (int i = 0; i < input_num; i++) {
43  // Find the maximum of each input.
44  float max_elem = -FLT_MAX;
45  softmax_max_loop0:
46  for (int j = 0; j < input_size - max8_remainder; j += 8) {
47  max_elem = max9(max_elem,
48  _inputs[i][j],
49  _inputs[i][j + 1],
50  _inputs[i][j + 2],
51  _inputs[i][j + 3],
52  _inputs[i][j + 4],
53  _inputs[i][j + 5],
54  _inputs[i][j + 6],
55  _inputs[i][j + 7]);
56  }
57  // Do the remainder.
58  softmax_max_loop1:
59  for (int j = input_size - max8_remainder - 1; j < input_size; j++) {
60  max_elem = max2(max_elem, _inputs[i][j]);
61  }
62 
63  // Subtract the max from each activation.
64  softmax_max_sub:
65  for (int j = 0; j < input_size; j++) {
66  _results[i][j] = _inputs[i][j] - max_elem;
67  }
68 
69  // Now exponentiate.
70  softmax_exp:
71  for (int j =0; j < input_size; j++) {
72  _results[i][j] = exp(_results[i][j]);
73  }
74 
75  // Compute the normalization factor, separately from the exponentiation,
76  // making it easier for Aladdin to turn this into an adder tree.
77  float normaliz = 0.0;
78  softmax_inner0:
79  for (int j = 0; j < input_size; j++) {
80  normaliz += _results[i][j];
81  }
82  // Precompute the division so that later we can just do a
83  // multiplication.
84  normaliz = 1.0 / (normaliz + 1e-6); // epsilon for numerical stability.
85 
86  softmax_inner1:
87  for (int j = 0; j < input_size; j++) {
88  _results[i][j] *= normaliz;
89  }
90  }
91  dmaLoad(results, results,
92  input_num * (input_size + input_pad) * sizeof(float));
93 }
94 
95 #ifdef __cplusplus
96 }
97 #endif
98 
99 namespace smaug {
100 
101 template <>
102 void SoftmaxOp<ReferenceBackend>::run() {
103  auto inputs = getInput(Inputs);
104  auto outputs = getOutput(Outputs);
105  const TensorShape& inputShape = inputs->getShape();
106  assert(inputShape == outputs->getShape());
107  float* inputData = inputs->data<float>();
108  float* outputData = outputs->data<float>();
109  mapArrayToAccel(ref::kEltwiseOpHw, "inputs", inputData,
110  inputs->getShape().storageSize() * sizeof(float));
111  mapArrayToAccel(ref::kEltwiseOpHw, "results", outputData,
112  inputs->getShape().storageSize() * sizeof(float));
113  invokeKernel(ref::kEltwiseOpHw, ref_softmax_nc, inputData, outputData,
114  inputShape[0], inputShape[1], inputShape.getPadding(1));
115 }
116 
117 } // namespace smaug
118 
ref_softmax_nc
void ref_softmax_nc(float *inputs, float *results, int input_num, int input_size, int input_pad)
Definition: ref_softmax_op.cpp:27
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72