SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
compare.c
2 #include "smaug/operators/smv/kernels/params.h"
4 
5 #ifdef __cplusplus
6 extern "C" {
7 #endif
8 
10 static inline v8bl_t convert_to_bool(v8sfx_t a) {
11  return (v8bl_t){ (bool)a[0], (bool)a[1], (bool)a[2], (bool)a[3],
12  (bool)a[4], (bool)a[5], (bool)a[6], (bool)a[7] };
13 }
14 
19 void smv_less_nc_vec_fxp(float16* host_inputs0,
20  float16* host_inputs1,
21  bool* host_results,
22  float* inputs0,
23  float* inputs1,
24  bool* results,
25  int inputs_size) {
26  // Load inputs.
27  host_load_fp16(inputs0, host_inputs0, inputs_size, 0, 0);
28  host_load_fp16(inputs1, host_inputs1, inputs_size, 0, 0);
29 
30  VEC_ARRAY_1D(v8fp_t, _inputs0, inputs0);
31  VEC_ARRAY_1D(v8fp_t, _inputs1, inputs1);
32  VEC_ARRAY_1D(v8bl_t, _results, results);
33 
34  less_loop:
35  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
36  v8sfx_t result = _inputs0[i] < _inputs1[i];
37  _results[i] = convert_to_bool(result);
38  }
39 
40  // Store results to the host memory.
41  dmaStore(host_results, results, inputs_size * sizeof(bool));
42 }
43 
48 void smv_less_equal_nc_vec_fxp(float16* host_inputs0,
49  float16* host_inputs1,
50  bool* host_results,
51  float* inputs0,
52  float* inputs1,
53  bool* results,
54  int inputs_size) {
55  // Load inputs.
56  host_load_fp16(inputs0, host_inputs0, inputs_size, 0, 0);
57  host_load_fp16(inputs1, host_inputs1, inputs_size, 0, 0);
58 
59  VEC_ARRAY_1D(v8fp_t, _inputs0, inputs0);
60  VEC_ARRAY_1D(v8fp_t, _inputs1, inputs1);
61  VEC_ARRAY_1D(v8bl_t, _results, results);
62 
63  less_equal_loop:
64  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
65  v8sfx_t result = _inputs0[i] <= _inputs1[i];
66  _results[i] = convert_to_bool(result);
67  }
68 
69  // Store results to the host memory.
70  dmaStore(host_results, results, inputs_size * sizeof(bool));
71 }
72 
77 void smv_greater_nc_vec_fxp(float16* host_inputs0,
78  float16* host_inputs1,
79  bool* host_results,
80  float* inputs0,
81  float* inputs1,
82  bool* results,
83  int inputs_size) {
84  // Load inputs.
85  host_load_fp16(inputs0, host_inputs0, inputs_size, 0, 0);
86  host_load_fp16(inputs1, host_inputs1, inputs_size, 0, 0);
87 
88  VEC_ARRAY_1D(v8fp_t, _inputs0, inputs0);
89  VEC_ARRAY_1D(v8fp_t, _inputs1, inputs1);
90  VEC_ARRAY_1D(v8bl_t, _results, results);
91 
92  greater_loop:
93  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
94  v8sfx_t result = _inputs0[i] > _inputs1[i];
95  _results[i] = convert_to_bool(result);
96  }
97 
98  // Store results to the host memory.
99  dmaStore(host_results, results, inputs_size * sizeof(bool));
100 }
101 
106 void smv_greater_equal_nc_vec_fxp(float16* host_inputs0,
107  float16* host_inputs1,
108  bool* host_results,
109  float* inputs0,
110  float* inputs1,
111  bool* results,
112  int inputs_size) {
113  // Load inputs.
114  host_load_fp16(inputs0, host_inputs0, inputs_size, 0, 0);
115  host_load_fp16(inputs1, host_inputs1, inputs_size, 0, 0);
116 
117  VEC_ARRAY_1D(v8fp_t, _inputs0, inputs0);
118  VEC_ARRAY_1D(v8fp_t, _inputs1, inputs1);
119  VEC_ARRAY_1D(v8bl_t, _results, results);
120 
121  greater_equal_loop:
122  for (int i = 0; i < inputs_size / VECTOR_SIZE; i++) {
123  v8sfx_t result = _inputs0[i] >= _inputs1[i];
124  _results[i] = convert_to_bool(result);
125  }
126 
127  // Store results to the host memory.
128  dmaStore(host_results, results, inputs_size * sizeof(bool));
129 }
130 
131 #ifdef __cplusplus
132 } // extern "C"
133 #endif
host_load_fp16
void host_load_fp16(float *local_data, float16 *remote_data, int num_elems, int local_offset, int remote_offset)
Definition: load_store_fp16_data.c:7
smv_less_equal_nc_vec_fxp
void smv_less_equal_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
Definition: compare.c:48
smv_greater_equal_nc_vec_fxp
void smv_greater_equal_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
Definition: compare.c:106
v8bl_t
uint8_t v8bl_t
8 packed 8-bit bool values.
Definition: common.h:325
v8fp_t
fp_t v8fp_t
8 packed 32-bit floating point values.
Definition: common.h:301
load_store_fp16_data.h
Aladdin kernels to load/store FP16 data to/from host memory.
smv_greater_nc_vec_fxp
void smv_greater_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
Definition: compare.c:77
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
VECTOR_SIZE
#define VECTOR_SIZE
Vector size used in SMV backends.
Definition: common.h:293
ALWAYS_INLINE
#define ALWAYS_INLINE
We have to disable all function inlining at the global level for Aladdin + LLVM-Tracer to work,...
Definition: common.h:521
v8sfx_t
sfx_t v8sfx_t
8 packed 32-bit integer values.
Definition: common.h:318
smv_less_nc_vec_fxp
void smv_less_nc_vec_fxp(float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
Definition: compare.c:19