SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
activation_functions_simd.h
1 #ifndef _OPERATORS_SMV_KERNELS_ACTIVATION_FUNCTIONS_SIMD_H_
2 #define _OPERATORS_SMV_KERNELS_ACTIVATION_FUNCTIONS_SIMD_H_
3 
4 #include "assert.h"
5 #include "stdio.h"
6 
8 #include "smaug/operators/smv/kernels/params.h"
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 // The rectified linear activation function
16 static inline v8fp_t relu_vec_unit(v8fp_t a) {
17  v8fp_t zero = (v8fp_t){ 0 };
18  v8sfx_t mask = (a > zero);
19  return VEC256_MASK(a, mask);
20 }
21 
23 static inline void relu_vec(v8fp_t* inputs, v8fp_t* results, int inputs_size) {
24  relu_loop:
25  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
26  results[i] = relu_vec_unit(inputs[i]);
27  }
28 }
29 
30 // The leaky rectified linear activation function
32 static inline v8fp_t lrelu_vec_unit(v8fp_t a, float slope) {
33  v8fp_t zero = (v8fp_t){ 0 };
34  v8fp_t slope_vec =
35  (v8fp_t){ slope, slope, slope, slope, slope, slope, slope, slope };
36  v8sfx_t neg_mask = a < zero;
37  v8sfx_t pos_mask = a >= zero;
38  v8fp_t scaled = slope_vec * a;
39  v8fp_t first = VEC256_MASK(scaled, neg_mask);
40  v8fp_t second = VEC256_MASK(a, pos_mask);
41  return VEC256_MASK(scaled, neg_mask) + VEC256_MASK(a, pos_mask);
42 }
43 
45 static inline void lrelu_vec(v8fp_t* inputs,
46  v8fp_t* results,
47  int inputs_size,
48  float slope) {
49  lrelu_loop:
50  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
51  results[i] = lrelu_vec_unit(inputs[i], slope);
52  }
53 }
54 
55 // The exponential linear activation function
57 static inline v8fp_t elu_vec_unit(v8fp_t a, float alpha) {
58  elu_unit_loop:
59  for (int i = 0; i < VECTOR_SIZE; i++) {
60  float value = a[i];
61  if (value < 0.0) {
62  a[i] = alpha * (exp(value) - 1);
63  }
64  }
65  return a;
66 }
67 
69 static inline void elu_vec(v8fp_t* inputs,
70  v8fp_t* results,
71  int inputs_size,
72  float alpha) {
73  elu_loop:
74  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
75  results[i] = elu_vec_unit(inputs[i], alpha);
76  }
77 }
78 
79 // The scaled exponential linear activation function
81 static inline v8fp_t selu_vec_unit(v8fp_t a, float alpha, float lambda) {
82  a = elu_vec_unit(a, alpha);
83  selu_unit_loop:
84  for (int i = 0; i < VECTOR_SIZE; i++) {
85  a[i] = lambda * a[i];
86  }
87  return a;
88 }
89 
91 static inline void selu_vec(v8fp_t* inputs,
92  v8fp_t* results,
93  int inputs_size,
94  float alpha,
95  float lambda) {
96  selu_loop:
97  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
98  results[i] = selu_vec_unit(inputs[i], alpha, lambda);
99  }
100 }
101 
102 // The logistic activation function
104 static inline v8fp_t sigmoid_vec_unit(v8fp_t a) {
105  int i;
106  float value;
107  sigmoid_unit_loop:
108  for (i = 0; i < VECTOR_SIZE; i++) {
109  a[i] = 1.0 / (1.0 + exp(-a[i]));
110  }
111  return a;
112 }
113 
115 static inline void sigmoid_vec(v8fp_t* inputs,
116  v8fp_t* results,
117  int inputs_size) {
118  sigmoid_loop:
119  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
120  results[i] = sigmoid_vec_unit(inputs[i]);
121  }
122 }
123 
124 // The hyberbolic sine activation function
126 static inline v8fp_t tanh_vec_unit(v8fp_t a) {
127  v8fp_t one = { 1, 1, 1, 1, 1, 1, 1, 1 };
128  v8fp_t two = { 2, 2, 2, 2, 2, 2, 2, 2 };
129  v8fp_t two_a = two * a;
130  v8fp_t sig = sigmoid_vec_unit(two_a);
131  return two * sig - one;
132 }
133 
135 static inline void tanh_vec(v8fp_t* inputs, v8fp_t* results, int inputs_size) {
136  tanh_loop:
137  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
138  results[i] = tanh_vec_unit(inputs[i]);
139  }
140 }
141 
142 // The hard hyberbolic sine activation function
144 static inline v8fp_t hard_tanh_vec_unit(v8fp_t a, float min, float max) {
145  hard_tanh_unit_loop:
146  for (int i = 0; i < VECTOR_SIZE; i++) {
147  float value = a[i];
148  a[i] = value < min ? min : value > max ? max : value;
149  }
150  return a;
151 }
152 
154 static inline void hard_tanh_vec(v8fp_t* inputs,
155  v8fp_t* results,
156  int inputs_size,
157  float min,
158  float max) {
159  hard_tanh_loop:
160  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
161  results[i] = hard_tanh_vec_unit(inputs[i], min, max);
162  }
163 }
164 
166 static inline void activation_fun_vec(float* inputs,
167  float* results,
168  int inputs_size,
169  activation_type function,
170  activation_param_t params) {
171  VEC_ARRAY_1D(v8fp_t, _inputs, inputs);
172  VEC_ARRAY_1D(v8fp_t, _results, results);
173  if (function == RELU) {
174  relu_vec(_inputs, _results, inputs_size);
175  } else if (function == LRELU) {
176  lrelu_vec(_inputs, _results, inputs_size, params.slope);
177  } else if (function == ELU) {
178  elu_vec(_inputs, _results, inputs_size, params.alpha);
179  } else if (function == SELU) {
180  selu_vec(_inputs, _results, inputs_size, params.alpha, params.lambda);
181  } else if (function == TANH) {
182  tanh_vec(_inputs, _results, inputs_size);
183  } else if (function == HARD_TANH) {
184  hard_tanh_vec(_inputs, _results, inputs_size, params.min, params.max);
185  } else if (function == SIGMOID) {
186  sigmoid_vec(_inputs, _results, inputs_size);
187  } else if (function == SOFTMAX) {
188  assert(false && "Softmax SIMD shouldn't be called from here!");
189  }
190 }
191 
192 #ifdef __cplusplus
193 } // extern "C"
194 #endif
195 
196 #endif
activation_type
enum _activation_type activation_type
The activation function to apply to an operator's output in hardware.
VEC256_MASK
#define VEC256_MASK(input, mask)
Same as VEC128_MASK, but for 256-bit vectors.
Definition: common.h:347
_activation_param_t
Parameters to the activation function hardware.
Definition: common.h:194
v8fp_t
fp_t v8fp_t
8 packed 32-bit floating point values.
Definition: common.h:301
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
VECTOR_SIZE
#define VECTOR_SIZE
Vector size used in SMV backends.
Definition: common.h:293
ALWAYS_INLINE
#define ALWAYS_INLINE
We have to disable all function inlining at the global level for Aladdin + LLVM-Tracer to work,...
Definition: common.h:521
v8sfx_t
sfx_t v8sfx_t
8 packed 32-bit integer values.
Definition: common.h:318