SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
common.h
Go to the documentation of this file.
1 
6 #ifndef _OPERATORS_COMMON_H_
7 #define _OPERATORS_COMMON_H_
8 
9 #include <stdint.h>
10 
11 #include "gem5/sampling_interface.h"
12 
13 #ifdef DMA_MODE
14 #ifdef __cplusplus
15 extern "C" {
16 #endif
17 #include "gem5/dma_interface.h"
18 #ifdef __cplusplus
19 }
20 #endif
21 #include "gem5/aladdin_sys_connection.h"
22 #include "gem5/aladdin_sys_constants.h"
23 #include "gem5/systolic_array_connection.h"
24 #endif
25 
26 #ifdef __cplusplus
27 // Functions for invoking kernels and mapping arrays.
28 //
29 // If gem5 simulation is not used, the pure software version of the accelerate
30 // kernels will be invoked.
31 //
32 // These functions should be called from C++ files and not be included in C
33 // files.
34 
35 #include <string>
36 #include <utility>
37 #include <memory>
38 #include "smaug/core/globals.h"
39 #include "tracer/trace_logger_aladdin.h"
40 
41 namespace smaug {
42 
48 std::string getTraceName(int accelIdx);
49 
71 template <typename Kernel, typename... Args>
72 void invokeKernel(int accelIdx,
73  unsigned reqCode,
74  const Kernel& kernel,
75  Args&&... args) {
76  if (runningInSimulation) {
77  invokeAcceleratorAndBlock(reqCode);
78  } else {
79 #ifdef TRACE_MODE
80  llvmtracer_set_trace_name(getTraceName(accelIdx).c_str());
81 #endif
82  kernel(std::forward<Args>(args)...);
83  }
84 }
85 
92 template <typename Kernel, typename... Args>
93 void invokeKernel(unsigned reqCode, const Kernel& kernel, Args&&... args) {
94  invokeKernel(0, reqCode, kernel, std::forward<Args>(args)...);
95 }
96 
105 template <typename Kernel, typename... Args>
106 std::unique_ptr<volatile int> invokeKernelNoBlock(int accelIdx,
107  unsigned reqCode,
108  const Kernel& kernel,
109  Args&&... args) {
110  if (runningInSimulation) {
111  return std::unique_ptr<volatile int>(
112  invokeAcceleratorAndReturn(reqCode));
113  } else {
114 #ifdef TRACE_MODE
115  llvmtracer_set_trace_name(getTraceName(accelIdx).c_str());
116 #endif
117  kernel(std::forward<Args>(args)...);
118  return nullptr;
119  }
120 }
121 
134 void mapArrayToAccel(unsigned reqCode,
135  const char* arrayName,
136  void* baseAddr,
137  size_t size);
138 
151 void setArrayMemTypeIfSimulating(unsigned reqCode,
152  const char* arrayName,
153  MemoryType memType);
154 
155 } // namespace smaug
156 #endif
157 
158 #ifdef __cplusplus
159 extern "C" {
160 #endif
161 
164 size_t next_multiple(size_t request, size_t align);
165 
166 #ifdef __cplusplus
167 }
168 #endif
169 
175 typedef enum _activation_type {
176  NO_ACTIVATION,
177  RELU,
178  RELU_THRESHOLD,
179  LRELU,
180  ELU,
181  SELU,
182  TANH,
183  HARD_TANH,
184  SIGMOID,
185  SOFTMAX
187 
194 typedef struct _activation_param_t {
195  // LReLU
196  float slope;
197  // ELU/SELU
198  float alpha;
199  float lambda;
200  // Hard Tanh
201  float min;
202  float max;
204 
205 #ifdef __cplusplus
206 
211  public:
212  ActivationInfo() : function(activation_type::NO_ACTIVATION) {}
213  ActivationInfo(activation_type _function) : function(_function) {
214  // Use default parameters if not specified.
215  switch (_function) {
216  case activation_type::LRELU:
217  params.slope = 0.2;
218  break;
219  case activation_type::ELU:
220  params.alpha = 0.1;
221  break;
222  case activation_type::SELU:
223  params.alpha = 1.6733;
224  params.lambda = 1.0507;
225  break;
226  case activation_type::HARD_TANH:
227  params.min = -1;
228  params.max = 1;
229  break;
230  default:
231  break;
232  }
233  }
235  : function(_function), params(_params) {}
236  activation_type function;
237  activation_param_t params;
238 };
239 #endif
240 
250 typedef enum _SamplingLevel {
251  NoSampling = 0,
252  Low = 1,
253  Medium = 2,
254  High = 3,
255  VeryHigh = 4
257 
262 typedef struct _SamplingInfo {
271 
272 // Scalar types.
273 typedef float fp_t;
274 typedef int sfx_t;
275 typedef unsigned ufx_t;
276 typedef uint16_t fp16_t;
277 typedef uint16_t float16;
278 
279 #define CACHELINE_SIZE 32
280 #define LOG_PAGE_SIZE 12
281 
292 #ifndef VECTOR_SIZE
293 #define VECTOR_SIZE 8
294 #endif
295 
297 typedef fp16_t v16fp_t
298  __attribute__((__vector_size__(VECTOR_SIZE * 2 * sizeof(fp_t))));
300 typedef fp_t v8fp_t
301  __attribute__((__vector_size__(VECTOR_SIZE * sizeof(fp_t))));
303 typedef fp_t v4fp_t
304  __attribute__((__vector_size__(VECTOR_SIZE / 2 * sizeof(fp_t))));
305 
307 typedef fp16_t v16ph_t
308  __attribute__((__vector_size__(VECTOR_SIZE * 2 * sizeof(fp16_t))));
310 typedef fp16_t v8ph_t
311  __attribute__((__vector_size__(VECTOR_SIZE * sizeof(fp16_t))));
313 typedef fp16_t v4ph_t
314  __attribute__((__vector_size__(VECTOR_SIZE / 2 * sizeof(fp16_t))));
315 
317 typedef sfx_t v8sfx_t
318  __attribute__((__vector_size__(VECTOR_SIZE * sizeof(sfx_t))));
320 typedef sfx_t v4sfx_t
321  __attribute__((__vector_size__(VECTOR_SIZE / 2 * sizeof(sfx_t))));
322 
324 typedef uint8_t v8bl_t
325  __attribute__((__vector_size__(VECTOR_SIZE * sizeof(uint8_t))));
326 
339 #define VEC128_MASK(input, mask) ((v4fp_t)((v4sfx_t)input & mask))
340 
347 #define VEC256_MASK(input, mask) ((v8fp_t)((v8sfx_t)input & mask))
348 
383 #if defined(__clang__)
384 
385 #define TO_TYPE(output_array_name, input_array_name) \
386  output_array_name##_t output_array_name = \
387  (output_array_name##_t)(input_array_name)
388 
389 #define ARRAY_1D(TYPE, output_array_name, input_array_name) \
390  TYPE* output_array_name = (TYPE*)input_array_name
391 
392 #define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \
393  typedef TYPE(*output_array_name##_t)[DIM_1]; \
394  TO_TYPE(output_array_name, input_array_name)
395 
396 #define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \
397  typedef TYPE(*output_array_name##_t)[DIM_1][DIM_2]; \
398  TO_TYPE(output_array_name, input_array_name)
399 
400 #define ARRAY_4D( \
401  TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \
402  typedef TYPE(*output_array_name##_t)[DIM_1][DIM_2][DIM_3]; \
403  TO_TYPE(output_array_name, input_array_name)
404 
405 #define ARRAY_5D( \
406  TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \
407  typedef TYPE(*output_array_name##_t)[DIM_1][DIM_2][DIM_3][DIM_4]; \
408  TO_TYPE(output_array_name, input_array_name)
409 
410 #define VEC_ARRAY_1D(TYPE, output_array_name, input_array_name) \
411  TYPE* output_array_name = (TYPE*)input_array_name
412 
413 #define VEC_ARRAY_2D(TYPE, output_array_name, input_array_name, cols) \
414  typedef TYPE(*output_array_name##_t)[(cols) / VECTOR_SIZE]; \
415  TO_TYPE(output_array_name, input_array_name)
416 
417 #define VEC_ARRAY_3D(TYPE, output_array_name, input_array_name, rows, cols) \
418  typedef TYPE(*output_array_name##_t)[(rows)][(cols) / VECTOR_SIZE]; \
419  TO_TYPE(output_array_name, input_array_name)
420 
421 #define VEC_ARRAY_4D( \
422  TYPE, output_array_name, input_array_name, height, rows, cols) \
423  typedef TYPE( \
424  *output_array_name##_t)[(height)][(rows)][(cols) / VECTOR_SIZE]; \
425  TO_TYPE(output_array_name, input_array_name)
426 
427 #elif defined(__GNUC__)
428 
429 #define ARRAY_1D(TYPE, output_array_name, input_array_name) \
430  TYPE* output_array_name = (TYPE*)input_array_name
431 
432 #define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \
433  TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name
434 
435 #define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \
436  TYPE(*output_array_name)[DIM_1][DIM_2] = \
437  (TYPE(*)[DIM_1][DIM_2])input_array_name
438 
439 #define ARRAY_4D( \
440  TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \
441  TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \
442  (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name
443 
444 #define ARRAY_5D( \
445  TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \
446  TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \
447  (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name
448 
449 #define VEC_ARRAY_1D(TYPE, output_array_name, input_array_name) \
450  TYPE* output_array_name = (TYPE*)(input_array_name)
451 
452 #define VEC_ARRAY_2D(TYPE, output_array_name, input_array_name, cols) \
453  TYPE(*output_array_name) \
454  [(cols) / (VECTOR_SIZE)] = \
455  (TYPE(*)[(cols) / (VECTOR_SIZE)]) input_array_name
456 
457 #define VEC_ARRAY_3D(TYPE, output_array_name, input_array_name, rows, cols) \
458  TYPE(*output_array_name) \
459  [(rows)][(cols) / (VECTOR_SIZE)] = \
460  (TYPE(*)[(rows)][(cols) / (VECTOR_SIZE)]) input_array_name
461 
462 #define VEC_ARRAY_4D( \
463  TYPE, output_array_name, input_array_name, height, rows, cols) \
464  TYPE(*output_array_name) \
465  [(height)][(rows)][(cols) / (VECTOR_SIZE)] = \
466  (TYPE(*)[(height)][(rows)][(cols) / (VECTOR_SIZE)]) \
467  input_array_name
468 
471 #endif
472 
473 
495 #define max2(A, B) (((A) > (B)) ? (A) : (B))
496 #define max3(e0, e1, e2) max2(max2(e0, e1), e2)
497 #define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3))
498 #define max8(e0, e1, e2, e3, e4, e5, e6, e7) \
499  max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7))
500 #define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \
501  max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8)
502 #define min2(A, B) (((A) < (B)) ? (A) : (B))
503 
505 #define FRAC_CEIL(A, B) ((A) / (B) + ((A) % (B) != 0))
506 
518 #ifdef TRACE_MODE
519 #define ALWAYS_INLINE __attribute__((__always_inline__))
520 #else
521 #define ALWAYS_INLINE
522 #endif
523 
527 #ifdef TRACE_MODE
528 #define ASSERT(x)
529 #else
530 #define ASSERT(x) assert(x)
531 #endif
532 
537 #ifdef TRACE_MODE
538 #define ASSUME_ALIGNED(ptr, alignment) (ptr)
539 #else
540 #define ASSUME_ALIGNED(ptr, args...) __builtin_assume_aligned((ptr), args)
541 #endif
542 
543 #define MAYBE_UNUSED __attribute__((__unused__))
544 
549 #endif
v16ph_t
fp16_t v16ph_t
16 packed 16-bit floating point values.
Definition: common.h:308
activation_type
enum _activation_type activation_type
The activation function to apply to an operator's output in hardware.
v4ph_t
fp16_t v4ph_t
4 packed 16-bit floating point values.
Definition: common.h:314
_SamplingInfo
Simulation sampling information maintained by the Operator and passed to the accelerated kernel.
Definition: common.h:262
ActivationInfo
Specifies an activation function and relevant parameters.
Definition: common.h:210
smaug::invokeKernelNoBlock
std::unique_ptr< volatile int > invokeKernelNoBlock(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
A generic non-blocking interface to accelerated kernel functions.
Definition: common.h:106
smaug::setArrayMemTypeIfSimulating
void setArrayMemTypeIfSimulating(unsigned reqCode, const char *arrayName, MemoryType memType)
Sets what memory access mechanism the accelerator will use when accessing this array.
Definition: common.cpp:21
_SamplingLevel
_SamplingLevel
Levels of simulation sampling to apply to certain accelerator kernels.
Definition: common.h:250
_SamplingInfo::num_sample_iterations
int num_sample_iterations
The requested number of iterations to run a sampled loop.
Definition: common.h:269
_activation_param_t
Parameters to the activation function hardware.
Definition: common.h:194
v8bl_t
uint8_t v8bl_t
8 packed 8-bit bool values.
Definition: common.h:325
_activation_type
_activation_type
The activation function to apply to an operator's output in hardware.
Definition: common.h:175
SamplingInfo
struct _SamplingInfo SamplingInfo
Simulation sampling information maintained by the Operator and passed to the accelerated kernel.
smaug::runningInSimulation
bool runningInSimulation
This is true if the user chooses to run the network in gem5 simulation.
Definition: globals.cpp:4
globals.h
SMAUG Global variables.
v8ph_t
fp16_t v8ph_t
8 packed 16-bit floating point values.
Definition: common.h:311
v8fp_t
fp_t v8fp_t
8 packed 32-bit floating point values.
Definition: common.h:301
SamplingLevel
enum _SamplingLevel SamplingLevel
Levels of simulation sampling to apply to certain accelerator kernels.
v4fp_t
fp_t v4fp_t
4 packed 32-bit floating point values.
Definition: common.h:304
smaug::getTraceName
std::string getTraceName(int accelIdx)
Return the name of the dynamic trace for this accelerator.
Definition: common.cpp:6
_SamplingInfo::level
SamplingLevel level
Qualitative level of sampling.
Definition: common.h:264
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
VECTOR_SIZE
#define VECTOR_SIZE
Vector size used in SMV backends.
Definition: common.h:293
v16fp_t
fp16_t v16fp_t
16 packed 32-bit floating point values.
Definition: common.h:298
v8sfx_t
sfx_t v8sfx_t
8 packed 32-bit integer values.
Definition: common.h:318
next_multiple
size_t next_multiple(size_t request, size_t align)
Returns the smallest multiple of align that is >= request.
Definition: common.cpp:36
v4sfx_t
sfx_t v4sfx_t
4 packed 32-bit integer values.
Definition: common.h:321
activation_param_t
struct _activation_param_t activation_param_t
Parameters to the activation function hardware.
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72