SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
activation_functions_simd.c
3 #include "smaug/operators/smv/kernels/activation_functions_simd.h"
4 
5 #ifdef __cplusplus
6 extern "C" {
7 #endif
8 
13 void smv_activation_fun_nc_vec_fxp(float16* host_inputs,
14  float16* host_results,
15  float* inputs,
16  float* results,
17  int inputs_size,
18  activation_type function,
19  activation_param_t params) {
20  // Load inputs.
21  host_load_fp16(inputs, host_inputs, inputs_size, 0, 0);
22  activation_fun_vec(inputs, results, inputs_size, function, params);
23  // Store results to the host memory.
24  host_store_fp16(results, host_results, inputs_size, 0, 0);
25 }
26 
31 void smv_softmax_nc_vec_fxp(float16* host_inputs,
32  float16* host_results,
33  float* inputs,
34  float* results,
35  int input_num,
36  int input_size,
37  int input_pad) {
38  // Load inputs.
40  inputs, host_inputs, input_num * (input_size + input_pad), 0, 0);
41 
42  VEC_ARRAY_2D(v8fp_t, _inputs, inputs, input_size + input_pad);
43  VEC_ARRAY_2D(v8fp_t, _results, results, input_size + input_pad);
44  int input_vec_size = input_size / VECTOR_SIZE;
45 
46  softmax_batch:
47  for (int i = 0; i < input_num; i++) {
48  // Exponentiate.
49  softmax_exp:
50  for (int j = 0; j < input_vec_size; j++) {
51  softmax_exp_vec:
52  for (int k = 0; k < VECTOR_SIZE; k++)
53  _results[i][j][k] = exp(_inputs[i][j][k]);
54  }
55 
56  // Compute the normalization factor.
57  float normaliz = 0.0;
58  softmax_reduce:
59  for (int j = 0; j < input_vec_size; j++) {
60  softmax_reduce_vec:
61  for (int k = 0; k < VECTOR_SIZE; k++)
62  normaliz += _results[i][j][k];
63  }
64 
65  // Precompute the division so that later we can just do a
66  // multiplication.
67  normaliz = 1.0 / (normaliz + 1e-6); // epsilon for numerical stability.
68 
69  softmax_mul:
70  for (int j = 0; j < input_vec_size; j++) {
71  softmax_mul_vec:
72  for (int k = 0; k < VECTOR_SIZE; k++)
73  _results[i][j][k] *= normaliz;
74  }
75  }
76 
77  // Store results to the host memory.
79  results, host_results, input_num * (input_size + input_pad), 0, 0);
80 }
81 
82 #ifdef __cplusplus
83 } // extern "C"
84 #endif
activation_type
enum _activation_type activation_type
The activation function to apply to an operator's output in hardware.
host_store_fp16
void host_store_fp16(float *local_data, float16 *remote_data, int num_elems, int local_offset, int remote_offset)
Definition: load_store_fp16_data.c:45
host_load_fp16
void host_load_fp16(float *local_data, float16 *remote_data, int num_elems, int local_offset, int remote_offset)
Definition: load_store_fp16_data.c:7
_activation_param_t
Parameters to the activation function hardware.
Definition: common.h:194
v8fp_t
fp_t v8fp_t
8 packed 32-bit floating point values.
Definition: common.h:301
smv_softmax_nc_vec_fxp
void smv_softmax_nc_vec_fxp(float16 *host_inputs, float16 *host_results, float *inputs, float *results, int input_num, int input_size, int input_pad)
Definition: activation_functions_simd.c:31
smv_activation_fun_nc_vec_fxp
void smv_activation_fun_nc_vec_fxp(float16 *host_inputs, float16 *host_results, float *inputs, float *results, int inputs_size, activation_type function, activation_param_t params)
Definition: activation_functions_simd.c:13
load_store_fp16_data.h
Aladdin kernels to load/store FP16 data to/from host memory.
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
VECTOR_SIZE
#define VECTOR_SIZE
Vector size used in SMV backends.
Definition: common.h:293