SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
|
Go to the documentation of this file.
6 #ifndef _OPERATORS_COMMON_H_
7 #define _OPERATORS_COMMON_H_
11 #include "gem5/sampling_interface.h"
17 #include "gem5/dma_interface.h"
21 #include "gem5/aladdin_sys_connection.h"
22 #include "gem5/aladdin_sys_constants.h"
23 #include "gem5/systolic_array_connection.h"
39 #include "tracer/trace_logger_aladdin.h"
71 template <
typename Kernel,
typename... Args>
77 invokeAcceleratorAndBlock(reqCode);
80 llvmtracer_set_trace_name(
getTraceName(accelIdx).c_str());
82 kernel(std::forward<Args>(args)...);
92 template <
typename Kernel,
typename... Args>
93 void invokeKernel(
unsigned reqCode,
const Kernel& kernel, Args&&... args) {
94 invokeKernel(0, reqCode, kernel, std::forward<Args>(args)...);
105 template <
typename Kernel,
typename... Args>
108 const Kernel& kernel,
111 return std::unique_ptr<volatile int>(
112 invokeAcceleratorAndReturn(reqCode));
115 llvmtracer_set_trace_name(
getTraceName(accelIdx).c_str());
117 kernel(std::forward<Args>(args)...);
135 const char* arrayName,
152 const char* arrayName,
216 case activation_type::LRELU:
219 case activation_type::ELU:
222 case activation_type::SELU:
223 params.alpha = 1.6733;
224 params.lambda = 1.0507;
226 case activation_type::HARD_TANH:
235 :
function(_function), params(_params) {}
275 typedef unsigned ufx_t;
276 typedef uint16_t fp16_t;
277 typedef uint16_t float16;
279 #define CACHELINE_SIZE 32
280 #define LOG_PAGE_SIZE 12
293 #define VECTOR_SIZE 8
298 __attribute__((__vector_size__(
VECTOR_SIZE * 2 *
sizeof(fp_t))));
301 __attribute__((__vector_size__(
VECTOR_SIZE *
sizeof(fp_t))));
304 __attribute__((__vector_size__(
VECTOR_SIZE / 2 *
sizeof(fp_t))));
308 __attribute__((__vector_size__(
VECTOR_SIZE * 2 *
sizeof(fp16_t))));
311 __attribute__((__vector_size__(
VECTOR_SIZE *
sizeof(fp16_t))));
314 __attribute__((__vector_size__(
VECTOR_SIZE / 2 *
sizeof(fp16_t))));
318 __attribute__((__vector_size__(
VECTOR_SIZE *
sizeof(sfx_t))));
321 __attribute__((__vector_size__(
VECTOR_SIZE / 2 *
sizeof(sfx_t))));
325 __attribute__((__vector_size__(
VECTOR_SIZE *
sizeof(uint8_t))));
339 #define VEC128_MASK(input, mask) ((v4fp_t)((v4sfx_t)input & mask))
347 #define VEC256_MASK(input, mask) ((v8fp_t)((v8sfx_t)input & mask))
383 #if defined(__clang__)
385 #define TO_TYPE(output_array_name, input_array_name) \
386 output_array_name##_t output_array_name = \
387 (output_array_name##_t)(input_array_name)
389 #define ARRAY_1D(TYPE, output_array_name, input_array_name) \
390 TYPE* output_array_name = (TYPE*)input_array_name
392 #define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \
393 typedef TYPE(*output_array_name##_t)[DIM_1]; \
394 TO_TYPE(output_array_name, input_array_name)
396 #define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \
397 typedef TYPE(*output_array_name##_t)[DIM_1][DIM_2]; \
398 TO_TYPE(output_array_name, input_array_name)
401 TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \
402 typedef TYPE(*output_array_name##_t)[DIM_1][DIM_2][DIM_3]; \
403 TO_TYPE(output_array_name, input_array_name)
406 TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \
407 typedef TYPE(*output_array_name##_t)[DIM_1][DIM_2][DIM_3][DIM_4]; \
408 TO_TYPE(output_array_name, input_array_name)
410 #define VEC_ARRAY_1D(TYPE, output_array_name, input_array_name) \
411 TYPE* output_array_name = (TYPE*)input_array_name
413 #define VEC_ARRAY_2D(TYPE, output_array_name, input_array_name, cols) \
414 typedef TYPE(*output_array_name##_t)[(cols) / VECTOR_SIZE]; \
415 TO_TYPE(output_array_name, input_array_name)
417 #define VEC_ARRAY_3D(TYPE, output_array_name, input_array_name, rows, cols) \
418 typedef TYPE(*output_array_name##_t)[(rows)][(cols) / VECTOR_SIZE]; \
419 TO_TYPE(output_array_name, input_array_name)
421 #define VEC_ARRAY_4D( \
422 TYPE, output_array_name, input_array_name, height, rows, cols) \
424 *output_array_name##_t)[(height)][(rows)][(cols) / VECTOR_SIZE]; \
425 TO_TYPE(output_array_name, input_array_name)
427 #elif defined(__GNUC__)
429 #define ARRAY_1D(TYPE, output_array_name, input_array_name) \
430 TYPE* output_array_name = (TYPE*)input_array_name
432 #define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \
433 TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name
435 #define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \
436 TYPE(*output_array_name)[DIM_1][DIM_2] = \
437 (TYPE(*)[DIM_1][DIM_2])input_array_name
440 TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \
441 TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \
442 (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name
445 TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \
446 TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \
447 (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name
449 #define VEC_ARRAY_1D(TYPE, output_array_name, input_array_name) \
450 TYPE* output_array_name = (TYPE*)(input_array_name)
452 #define VEC_ARRAY_2D(TYPE, output_array_name, input_array_name, cols) \
453 TYPE(*output_array_name) \
454 [(cols) / (VECTOR_SIZE)] = \
455 (TYPE(*)[(cols) / (VECTOR_SIZE)]) input_array_name
457 #define VEC_ARRAY_3D(TYPE, output_array_name, input_array_name, rows, cols) \
458 TYPE(*output_array_name) \
459 [(rows)][(cols) / (VECTOR_SIZE)] = \
460 (TYPE(*)[(rows)][(cols) / (VECTOR_SIZE)]) input_array_name
462 #define VEC_ARRAY_4D( \
463 TYPE, output_array_name, input_array_name, height, rows, cols) \
464 TYPE(*output_array_name) \
465 [(height)][(rows)][(cols) / (VECTOR_SIZE)] = \
466 (TYPE(*)[(height)][(rows)][(cols) / (VECTOR_SIZE)]) \
495 #define max2(A, B) (((A) > (B)) ? (A) : (B))
496 #define max3(e0, e1, e2) max2(max2(e0, e1), e2)
497 #define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3))
498 #define max8(e0, e1, e2, e3, e4, e5, e6, e7) \
499 max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7))
500 #define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \
501 max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8)
502 #define min2(A, B) (((A) < (B)) ? (A) : (B))
505 #define FRAC_CEIL(A, B) ((A) / (B) + ((A) % (B) != 0))
519 #define ALWAYS_INLINE __attribute__((__always_inline__))
521 #define ALWAYS_INLINE
530 #define ASSERT(x) assert(x)
538 #define ASSUME_ALIGNED(ptr, alignment) (ptr)
540 #define ASSUME_ALIGNED(ptr, args...) __builtin_assume_aligned((ptr), args)
543 #define MAYBE_UNUSED __attribute__((__unused__))
fp16_t v16ph_t
16 packed 16-bit floating point values.
enum _activation_type activation_type
The activation function to apply to an operator's output in hardware.
fp16_t v4ph_t
4 packed 16-bit floating point values.
Simulation sampling information maintained by the Operator and passed to the accelerated kernel.
Specifies an activation function and relevant parameters.
std::unique_ptr< volatile int > invokeKernelNoBlock(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
A generic non-blocking interface to accelerated kernel functions.
void setArrayMemTypeIfSimulating(unsigned reqCode, const char *arrayName, MemoryType memType)
Sets what memory access mechanism the accelerator will use when accessing this array.
_SamplingLevel
Levels of simulation sampling to apply to certain accelerator kernels.
int num_sample_iterations
The requested number of iterations to run a sampled loop.
Parameters to the activation function hardware.
uint8_t v8bl_t
8 packed 8-bit bool values.
_activation_type
The activation function to apply to an operator's output in hardware.
struct _SamplingInfo SamplingInfo
Simulation sampling information maintained by the Operator and passed to the accelerated kernel.
bool runningInSimulation
This is true if the user chooses to run the network in gem5 simulation.
fp16_t v8ph_t
8 packed 16-bit floating point values.
fp_t v8fp_t
8 packed 32-bit floating point values.
enum _SamplingLevel SamplingLevel
Levels of simulation sampling to apply to certain accelerator kernels.
fp_t v4fp_t
4 packed 32-bit floating point values.
std::string getTraceName(int accelIdx)
Return the name of the dynamic trace for this accelerator.
SamplingLevel level
Qualitative level of sampling.
The smaug namespace is the parent namespace of all C++ code in SMAUG.
#define VECTOR_SIZE
Vector size used in SMV backends.
fp16_t v16fp_t
16 packed 32-bit floating point values.
sfx_t v8sfx_t
8 packed 32-bit integer values.
size_t next_multiple(size_t request, size_t align)
Returns the smallest multiple of align that is >= request.
sfx_t v4sfx_t
4 packed 32-bit integer values.
struct _activation_param_t activation_param_t
Parameters to the activation function hardware.
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.