SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
Functions
Aladdin Kernel Functions

Kernel functions meant to be run under Aladdin. More...

Functions

float batch_norm_op (float input, float mean, float recip_sqrt_var, float gamma, float beta)
 
void ref_batch_norm_post_fc (float *inputs, float *mean, float *variance, float *gamma, float *beta, float *result, int input_nums, int input_size, int input_pad, activation_type act_function, activation_param_t act_params)
 
void ref_batch_norm_nchw_post_conv (float *inputs, float *mean, float *variance, float *gamma, float *beta, float *result, int img_nums, int img_chans, int img_rows, int img_cols, int img_pad, int wgt_pad, activation_type act_function, activation_param_t act_params)
 
void ref_batch_norm_nhwc_post_conv (float *inputs, float *mean, float *variance, float *gamma, float *beta, float *result, int img_nums, int img_rows, int img_cols, int img_chans, int img_pad, int wgt_pad, activation_type act_function, activation_param_t act_params)
 
void ref_conv3d_nchw_valid_padding (float *input, float *kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)
 
void ref_conv3d_nchw_same_padding (float *input, float *kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)
 
void ref_conv3d_nhwc_valid_padding (float *input, float *kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)
 
void ref_conv3d_nhwc_same_padding (float *input, float *kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)
 
void ref_conv2d_nchw_valid_padding (float *input, float *kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad)
 
void ref_conv2d_nchw_same_padding (float *input, float *kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad)
 
void ref_eltwise_add (float *input0, float *input1, float *results, int input_size)
 
void ref_eltwise_mul (float *input0, float *input1, float *results, int input_size)
 
void ref_greater (float *input0, float *input1, bool *results, int input_size)
 
void ref_greater_equal (float *input0, float *input1, bool *results, int input_size)
 
void ref_inner_product_ab_times_bc (float *a, float *b, float *c, int a_height, int a_width, int b_width, int a_pad, int b_pad, int c_pad, activation_type act_function, activation_param_t act_params)
 
void ref_inner_product_ab_times_cb (float *a, float *b, float *c, int a_height, int b_width, int b_height, int a_pad, int b_pad, int c_pad, activation_type act_function, activation_param_t act_params)
 
void ref_less (float *input0, float *input1, bool *results, int input_size)
 
void ref_less_equal (float *input0, float *input1, bool *results, int input_size)
 
void ref_max_pooling_nchw_treemax (float *input, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)
 
void ref_max_pooling_nhwc_treemax (float *input, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)
 
void ref_max_pooling_nchw_itermax (float *input, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)
 
void ref_max_pooling_nhwc_itermax (float *input, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)
 
void ref_avg_pooling_nchw (float *input, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)
 
void ref_avg_pooling_nhwc (float *input, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)
 
void ref_softmax_nc (float *inputs, float *results, int input_num, int input_size, int input_pad)
 
void smv_activation_fun_nc_vec_fxp (float16 *host_inputs, float16 *host_results, float *inputs, float *results, int inputs_size, activation_type function, activation_param_t params)
 
void smv_softmax_nc_vec_fxp (float16 *host_inputs, float16 *host_results, float *inputs, float *results, int input_num, int input_size, int input_pad)
 
v8fp_t batch_norm_simd_op (v8fp_t input, v8fp_t mean, v8fp_t recip_sqrt_var, v8fp_t gamma, v8fp_t beta)
 
void smv_batch_norm_post_fc_nc_vec_fxp (float16 *host_inputs, float16 *host_weights, float16 *host_results, float *inputs, float *weights, float *results, int inputs_dims[2], int weights_acts, int inputs_pad, int inputs_start, int send_results, activation_type act_function, activation_param_t act_params)
 
void smv_batch_norm_post_conv_nchw_vec_fxp (float16 *host_inputs, float16 *host_weights, float16 *host_results, float *inputs, float *weights, float *results, int inputs_dims[4], int weights_chans, int inputs_pad, int weights_pad, int weights_start, activation_type act_function, activation_param_t act_params)
 
void smv_batch_norm_post_conv_nhwc_vec_fxp (float16 *host_inputs, float16 *host_weights, float16 *host_results, float *inputs, float *weights, float *results, int inputs_dims[4], int weights_chans, int inputs_pad, int weights_pad, int weights_start, activation_type act_function, activation_param_t act_params, SamplingInfo *sampling)
 
void smv_less_nc_vec_fxp (float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
 
void smv_less_equal_nc_vec_fxp (float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
 
void smv_greater_nc_vec_fxp (float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
 
void smv_greater_equal_nc_vec_fxp (float16 *host_inputs0, float16 *host_inputs1, bool *host_results, float *inputs0, float *inputs1, bool *results, int inputs_size)
 
void smv_conv3d_nhwc_vec_fxp (float16 *host_inputs, float16 *host_weights, float16 *host_results, float *inputs, float *weights, float *results, int inputs_dims[4], int weights_dims[4], int results_dims[4], int inputs_align_pad, int weights_pad, int results_pad, int inputs_halo_pad[4], int row_stride, int col_stride, int ifmap_start, int kern_start, bool accumulate, bool read_inputs, bool read_weights, bool send_results, activation_type act_function, activation_param_t act_params, SamplingInfo *sampling)
 
void smv_eltwise_add_nc_vec_fxp (float16 *host_inputs0, float16 *host_inputs1, float16 *host_results, float *inputs0, float *inputs1, float *results, int inputs_size)
 
void smv_eltwise_mul_nc_vec_fxp (float16 *host_inputs0, float16 *host_inputs1, float16 *host_results, float *inputs0, float *inputs1, float *results, int inputs_size)
 
void host_load_fp16 (float *local_data, float16 *remote_data, int num_elems, int local_offset, int remote_offset)
 
void host_store_fp16 (float *local_data, float16 *remote_data, int num_elems, int local_offset, int remote_offset)
 
void smv_matrix_multiply_transpose_nc_vec_fxp (float16 *host_a, float16 *host_b, float16 *host_results, float *a, float *b, float *results, int a_dims[2], int b_dims[2], int results_dims[2], int a_pad, int b_pad, int results_pad, int a_start, int result_start, bool accumulate, bool read_inputs, bool send_results, activation_type act_function, activation_param_t act_params, SamplingInfo *sampling)
 
void smv_maxpooling_nhwc_vec_fxp (float16 *host_inputs, float16 *host_results, float *inputs, float *results, int inputs_dims[4], int results_dims[4], int inputs_pad, int results_pad, int pool_rows, int pool_cols, int row_stride, int col_stride, int ofmap_start, SamplingInfo *sampling)
 
void smv_avgpooling_nhwc_vec_fxp (float16 *host_inputs, float16 *host_results, float *inputs, float *results, int inputs_dims[4], int results_dims[4], int inputs_pad, int results_pad, int pool_rows, int pool_cols, int row_stride, int col_stride, int ofmap_start, SamplingInfo *sampling)
 
void ref_activation_fun_nc (float *inputs, float *results, int inputs_size, activation_type function, activation_param_t params)
 Top level entry point for all Reference activation functions.
 

Detailed Description

Kernel functions meant to be run under Aladdin.

Function Documentation

◆ batch_norm_op()

float batch_norm_op ( float  input,
float  mean,
float  recip_sqrt_var,
float  gamma,
float  beta 
)

Batch normalizes one input value.

Parameters
inputInput activation.
meanBatch mean
recip_sqrt_var1/sqrt(var + eps), which is precomputed to avoid having to run a sqrt and division in the ASIC.
gammaGamma parameter.
betaBeta parameter.

Definition at line 23 of file ref_batch_norm_op.cpp.

◆ batch_norm_simd_op()

v8fp_t batch_norm_simd_op ( v8fp_t  input,
v8fp_t  mean,
v8fp_t  recip_sqrt_var,
v8fp_t  gamma,
v8fp_t  beta 
)

Batch normalizes one input value.

Parameters
inputInput activation.
meanBatch mean
recip_sqrt_var1/sqrt(var + eps), which is precomputed to avoid having to run a sqrt and division in the ASIC.
gammaGamma parameter.
betaBeta parameter.

Definition at line 25 of file batch_norm.c.

◆ host_load_fp16()

void host_load_fp16 ( float *  local_data,
float16 *  remote_data,
int  num_elems,
int  local_offset,
int  remote_offset 
)

Loads half-precision fp data from the host and locally on the accelerator converts it into single-precision data.

The transfer operation is pipelined so it can be overlapped with the conversion operation. Each transfer is at most one page in size (4KB), which is converted into 8KB of data. The conversion is done in-place, so no additional SRAM is required to buffer the FP16 data.

Parameters
local_dataSingle-precision accelerator-local scratchpad.
remote_dataHalf-precision host memory address.
num_elemsNumber of elements to copy.
local_offsetOffset into local array to start copying data to in elements.
remote_offsetOffset into remote memory to start copying data from in elements.

Definition at line 7 of file load_store_fp16_data.c.

◆ host_store_fp16()

void host_store_fp16 ( float *  local_data,
float16 *  remote_data,
int  num_elems,
int  local_offset,
int  remote_offset 
)

Converts single-precision fp data from the accelerator into half-precision data and copy it to the host.

The transfer operation is pipelined so it can be overlapped with the conversion operation. Each transfer is at most one page in size (4KB), which is converted from 8KB of data. The conversion is done in-place, so no additional SRAM is required to buffer the FP16 data.

Parameters
local_dataSingle-precision accelerator-local scratchpad.
remote_dataHalf-precision host memory address.
num_elemsNumber of elements to copy.
local_offsetOffset into local array to start copying data to in elements.
remote_offsetOffset into remote memory to start copying data from in elements.

Definition at line 45 of file load_store_fp16_data.c.

◆ ref_avg_pooling_nchw()

void ref_avg_pooling_nchw ( float *  input,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  res_rows,
int  res_cols,
int  res_pad,
int  pool_row_size,
int  pool_col_size,
int  pool_row_stride,
int  pool_col_stride 
)

A Reference implementation of AvgPoolingOp on NCHW data.

Definition at line 256 of file ref_pooling_op.cpp.

◆ ref_avg_pooling_nhwc()

void ref_avg_pooling_nhwc ( float *  input,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  res_rows,
int  res_cols,
int  res_pad,
int  pool_row_size,
int  pool_col_size,
int  pool_row_stride,
int  pool_col_stride 
)

AladdinKernels Reference implementation of AvgPoolingOp on NHWC data.

Definition at line 308 of file ref_pooling_op.cpp.

◆ ref_batch_norm_nchw_post_conv()

void ref_batch_norm_nchw_post_conv ( float *  inputs,
float *  mean,
float *  variance,
float *  gamma,
float *  beta,
float *  result,
int  img_nums,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  wgt_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of batch normalization following a convolutional/pooling layer on NCHW data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 85 of file ref_batch_norm_op.cpp.

◆ ref_batch_norm_nhwc_post_conv()

void ref_batch_norm_nhwc_post_conv ( float *  inputs,
float *  mean,
float *  variance,
float *  gamma,
float *  beta,
float *  result,
int  img_nums,
int  img_rows,
int  img_cols,
int  img_chans,
int  img_pad,
int  wgt_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of batch normalization following a convolutional/pooling layer on NHWC data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 147 of file ref_batch_norm_op.cpp.

◆ ref_batch_norm_post_fc()

void ref_batch_norm_post_fc ( float *  inputs,
float *  mean,
float *  variance,
float *  gamma,
float *  beta,
float *  result,
int  input_nums,
int  input_size,
int  input_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of batch normalization following a fully-connected layer.

In this case, we have one pair of gamma/beta weights per activation.

Definition at line 40 of file ref_batch_norm_op.cpp.

◆ ref_conv2d_nchw_same_padding()

void ref_conv2d_nchw_same_padding ( float *  input,
float *  kernels,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  k_rows,
int  k_cols,
int  k_pad,
int  k_row_stride,
int  k_col_stride,
int  res_rows,
int  res_cols,
int  res_pad 
)

A Reference implementation of a depthwise convolution on NCHW data with same padding.

Definition at line 83 of file ref_depthwise_convolution_op.cpp.

◆ ref_conv2d_nchw_valid_padding()

void ref_conv2d_nchw_valid_padding ( float *  input,
float *  kernels,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  k_rows,
int  k_cols,
int  k_pad,
int  k_row_stride,
int  k_col_stride,
int  res_rows,
int  res_cols,
int  res_pad 
)

A Reference implementation of a depthwise convolution on NCHW data with valid padding. The Reference backend requires no alignment padding so all _pad parameters can be zero.

Definition at line 15 of file ref_depthwise_convolution_op.cpp.

◆ ref_conv3d_nchw_same_padding()

void ref_conv3d_nchw_same_padding ( float *  input,
float *  kernels,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  k_num,
int  k_rows,
int  k_cols,
int  k_pad,
int  k_row_stride,
int  k_col_stride,
int  res_rows,
int  res_cols,
int  res_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of a 3D convolution on NCHW data with same padding.

Definition at line 94 of file ref_convolution_op.cpp.

◆ ref_conv3d_nchw_valid_padding()

void ref_conv3d_nchw_valid_padding ( float *  input,
float *  kernels,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  k_num,
int  k_rows,
int  k_cols,
int  k_pad,
int  k_row_stride,
int  k_col_stride,
int  res_rows,
int  res_cols,
int  res_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of a 3D convolution on NCHW data with valid padding.

Definition at line 16 of file ref_convolution_op.cpp.

◆ ref_conv3d_nhwc_same_padding()

void ref_conv3d_nhwc_same_padding ( float *  input,
float *  kernels,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  k_num,
int  k_rows,
int  k_cols,
int  k_pad,
int  k_row_stride,
int  k_col_stride,
int  res_rows,
int  res_cols,
int  res_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of a 3D convolution on NHWC data with same padding.

Definition at line 266 of file ref_convolution_op.cpp.

◆ ref_conv3d_nhwc_valid_padding()

void ref_conv3d_nhwc_valid_padding ( float *  input,
float *  kernels,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  k_num,
int  k_rows,
int  k_cols,
int  k_pad,
int  k_row_stride,
int  k_col_stride,
int  res_rows,
int  res_cols,
int  res_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of a 3D convolution on NHWC data with valid padding.

Definition at line 188 of file ref_convolution_op.cpp.

◆ ref_eltwise_add()

void ref_eltwise_add ( float *  input0,
float *  input1,
float *  results,
int  input_size 
)

A Reference implementation of elementwise addition.

Definition at line 13 of file ref_eltwise_add_op.cpp.

◆ ref_eltwise_mul()

void ref_eltwise_mul ( float *  input0,
float *  input1,
float *  results,
int  input_size 
)

A Reference implementation of elementwise multiplication.

Definition at line 13 of file ref_eltwise_mul_op.cpp.

◆ ref_greater()

void ref_greater ( float *  input0,
float *  input1,
bool *  results,
int  input_size 
)

A Reference implementation of elementwise greater-than.

Definition at line 13 of file ref_greater_op.cpp.

◆ ref_greater_equal()

void ref_greater_equal ( float *  input0,
float *  input1,
bool *  results,
int  input_size 
)

A Reference implementation of elementwise greater-than-or-equal-to.

Definition at line 27 of file ref_greater_op.cpp.

◆ ref_inner_product_ab_times_bc()

void ref_inner_product_ab_times_bc ( float *  a,
float *  b,
float *  c,
int  a_height,
int  a_width,
int  b_width,
int  a_pad,
int  b_pad,
int  c_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of an inner product operator: C = A x B.

Parameters
aA matrix of dimensions a_height x a_width
bA matrix of dimensions a_width x b_width
cA matrix of dimensions a_height x b_width
a_heightNumber of rows in A
a_widthNumber of columns in A
b_widthNumber of columns in B
a_padAdditional alignment zero-padding on a.
b_padAdditional alignment zero-padding on b.
c_padAdditional alignment zero-padding on c.
act_functionThe activation function to apply on the result of the inner product.
act_paramsParameters to the activation function.

Definition at line 28 of file ref_inner_product_op.cpp.

◆ ref_inner_product_ab_times_cb()

void ref_inner_product_ab_times_cb ( float *  a,
float *  b,
float *  c,
int  a_height,
int  b_width,
int  b_height,
int  a_pad,
int  b_pad,
int  c_pad,
activation_type  act_function,
activation_param_t  act_params 
)

A Reference implementation of an inner product operator. C = A x * B_transpose.

Parameters
aA matrix of dimensions a_height x b_width
bA matrix of dimensions b_height x b_width
cA matrix of dimensions a_height x b_width
a_heightNumber of rows in A
b_widthNumber of columns in B
b_heightNumber of rows in B
a_padAdditional alignment zero-padding on a.
b_padAdditional alignment zero-padding on b.
c_padAdditional alignment zero-padding on c.
act_functionThe activation function to apply on the result of the inner product.
act_paramsParameters to the activation function.

Definition at line 87 of file ref_inner_product_op.cpp.

◆ ref_less()

void ref_less ( float *  input0,
float *  input1,
bool *  results,
int  input_size 
)

A Reference implementation of less-than.

Definition at line 13 of file ref_less_op.cpp.

◆ ref_less_equal()

void ref_less_equal ( float *  input0,
float *  input1,
bool *  results,
int  input_size 
)

A Reference implementation of less-than-or-equal-to.

Definition at line 27 of file ref_less_op.cpp.

◆ ref_max_pooling_nchw_itermax()

void ref_max_pooling_nchw_itermax ( float *  input,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  res_rows,
int  res_cols,
int  res_pad,
int  pool_row_size,
int  pool_col_size,
int  pool_row_stride,
int  pool_col_stride 
)

A Reference implementation of MaxPoolingOp on NCHW data, using a loop-based maximum function.

Definition at line 151 of file ref_pooling_op.cpp.

◆ ref_max_pooling_nchw_treemax()

void ref_max_pooling_nchw_treemax ( float *  input,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  res_rows,
int  res_cols,
int  res_pad,
int  pool_row_size,
int  pool_col_size,
int  pool_row_stride,
int  pool_col_stride 
)

A Reference implementation of MaxPoolingOp on NCHW data, using a tree-based maximum function.

Definition at line 15 of file ref_pooling_op.cpp.

◆ ref_max_pooling_nhwc_itermax()

void ref_max_pooling_nhwc_itermax ( float *  input,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  res_rows,
int  res_cols,
int  res_pad,
int  pool_row_size,
int  pool_col_size,
int  pool_row_stride,
int  pool_col_stride 
)

A Reference implementation of MaxPoolingOp on NHWC data, using a loop-based maximum function.

Definition at line 204 of file ref_pooling_op.cpp.

◆ ref_max_pooling_nhwc_treemax()

void ref_max_pooling_nhwc_treemax ( float *  input,
float *  result,
int  img_num,
int  img_chans,
int  img_rows,
int  img_cols,
int  img_pad,
int  res_rows,
int  res_cols,
int  res_pad,
int  pool_row_size,
int  pool_col_size,
int  pool_row_stride,
int  pool_col_stride 
)

A Reference implementation of MaxPoolingOp on NHWC data, using a tree-based maximum function.

Definition at line 83 of file ref_pooling_op.cpp.

◆ ref_softmax_nc()

void ref_softmax_nc ( float *  inputs,
float *  results,
int  input_num,
int  input_size,
int  input_pad 
)

A Reference implementation of the softmax function.

The softmax function exponentiates each element and then normalizes each row to sum to 1. To improve numerical stability, we use the max trick: all elements are first subtracted by the maximum value in each input before being exponentiated.

Parameters
inputsMatrix of size input_num x input_size, stored rowmajor. This contains both inputs and the outputs.
resultsOutput array.
input_numBatch size.
input_sizeNumber of activations per input.
input_padAlignment padding.

Definition at line 27 of file ref_softmax_op.cpp.

◆ smv_activation_fun_nc_vec_fxp()

void smv_activation_fun_nc_vec_fxp ( float16 *  host_inputs,
float16 *  host_results,
float *  inputs,
float *  results,
int  inputs_size,
activation_type  function,
activation_param_t  params 
)

Top level function entry for all unary SMV activation functions.

Definition at line 13 of file activation_functions_simd.c.

◆ smv_avgpooling_nhwc_vec_fxp()

void smv_avgpooling_nhwc_vec_fxp ( float16 *  host_inputs,
float16 *  host_results,
float *  inputs,
float *  results,
int  inputs_dims[4],
int  results_dims[4],
int  inputs_pad,
int  results_pad,
int  pool_rows,
int  pool_cols,
int  row_stride,
int  col_stride,
int  ofmap_start,
SamplingInfo sampling 
)

An average-pooling operation on SMV with NHWC format. This is the vectorized implementation.

This requires a blocked channel data format (GNHWC), where G = channels/8, and the last dimension = chans = 8. The last dimension MUST be 8. This supports arbitrary pooling sizes and strides.

Parameters
host_inputsHost inputs buffer in NHWC.
host_resultsHost results buffer in NHWC.
inputsLocal inputs buffer in NHWC.
resultsLocal results buffer in NHWC.
inputs_dimsDimensions of the inputs.
results_dimsDimensions of the results.
inputs_padAlign padding size on the channel dimension of the inputs.
results_padAlign padding size on the channel dimension of the results.
pool_rowsRow size of the pooling function.
pool_colsColumn size of the pooling function.
row_strideStride size on the row dimension.
col_strideStride size on the col dimension.
ofmap_startIf the results contains more channels than the inputs, start from this one. Otherwise this should always be zero.
samplingSimulation samplng settings.

Definition at line 166 of file pooling.c.

◆ smv_batch_norm_post_conv_nchw_vec_fxp()

void smv_batch_norm_post_conv_nchw_vec_fxp ( float16 *  host_inputs,
float16 *  host_weights,
float16 *  host_results,
float *  inputs,
float *  weights,
float *  results,
int  inputs_dims[4],
int  weights_chans,
int  inputs_pad,
int  weights_pad,
int  weights_start,
activation_type  act_function,
activation_param_t  act_params 
)

SMV implementation of batch normalization following a convolutional/pooling layer on NCHW data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 100 of file batch_norm.c.

◆ smv_batch_norm_post_conv_nhwc_vec_fxp()

void smv_batch_norm_post_conv_nhwc_vec_fxp ( float16 *  host_inputs,
float16 *  host_weights,
float16 *  host_results,
float *  inputs,
float *  weights,
float *  results,
int  inputs_dims[4],
int  weights_chans,
int  inputs_pad,
int  weights_pad,
int  weights_start,
activation_type  act_function,
activation_param_t  act_params,
SamplingInfo sampling 
)

SMV implementation of batch normalization following a convolutional/pooling layer on NHWC data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 196 of file batch_norm.c.

◆ smv_batch_norm_post_fc_nc_vec_fxp()

void smv_batch_norm_post_fc_nc_vec_fxp ( float16 *  host_inputs,
float16 *  host_weights,
float16 *  host_results,
float *  inputs,
float *  weights,
float *  results,
int  inputs_dims[2],
int  weights_acts,
int  inputs_pad,
int  inputs_start,
int  send_results,
activation_type  act_function,
activation_param_t  act_params 
)

SMV implementation of batch normalization following a fully-connected layer.

In this case, we have one pair of gamma/beta weights per activation.

Definition at line 41 of file batch_norm.c.

◆ smv_conv3d_nhwc_vec_fxp()

void smv_conv3d_nhwc_vec_fxp ( float16 *  host_inputs,
float16 *  host_weights,
float16 *  host_results,
float *  inputs,
float *  weights,
float *  results,
int  inputs_dims[4],
int  weights_dims[4],
int  results_dims[4],
int  inputs_align_pad,
int  weights_pad,
int  results_pad,
int  inputs_halo_pad[4],
int  row_stride,
int  col_stride,
int  ifmap_start,
int  kern_start,
bool  accumulate,
bool  read_inputs,
bool  read_weights,
bool  send_results,
activation_type  act_function,
activation_param_t  act_params,
SamplingInfo sampling 
)

Perform a 3D convolution with one kernel on an image, with reduction in NHWC format. This is the vectorized implementation.

Parameters
host_inputsHost inputs buffer in NHWC.
host_weightsHost weights buffer in NHWC.
host_resultsHost results buffer in NHWC.
inputsLocal inputs buffer in NHWC.
weightsLocal weights buffer in NHWC.
resultsLocal results buffer in NHWC.
inputs_dimsDimensions of the inputs.
weights_dimsDimensions of the weights.
results_dimsDimensions of the results.
inputs_align_padAlignment padding size on the channel dimension of the inputs.
weights_padAlignment padding size on the channel dimension of the weights.
results_padAlignment padding size on the channel dimension of the results.
inputs_halo_padPadding sizes on top, bottom, left and right of the input 2D feature maps.
row_strideStride size on the row dimension.
col_strideStride size on the col dimension.
ifmap_startIf the input contains more channels than the weights, start from this one. Otherwise this should always be zero.
kern_startIf the weights contain more kernels than the results buffer can fit, start from this one. Otherwise this should always be zero.
accumulateIf the original weight tensor is tiled channelwise, this should be set to true in order to avoid resetting the result buffer for non-first weight tiles.
read_inputsLoad inputs from the host. Set to false if the input activations can be reused from the last invocation.
read_weightsLoad weights from the host. Set to false if the weights can be reused from the last invocation.
send_resultsSend the results to the host memory if this is true.
act_functionActivation function the operator runs.
act_paramsParameters for the activation function.
samplingSimulation samplng settings.

Definition at line 53 of file convolution_simd.c.

◆ smv_eltwise_add_nc_vec_fxp()

void smv_eltwise_add_nc_vec_fxp ( float16 *  host_inputs0,
float16 *  host_inputs1,
float16 *  host_results,
float *  inputs0,
float *  inputs1,
float *  results,
int  inputs_size 
)

SMV implementation of elementwise addition.

Definition at line 13 of file eltwise_add.c.

◆ smv_eltwise_mul_nc_vec_fxp()

void smv_eltwise_mul_nc_vec_fxp ( float16 *  host_inputs0,
float16 *  host_inputs1,
float16 *  host_results,
float *  inputs0,
float *  inputs1,
float *  results,
int  inputs_size 
)

SMV implementation of elementwise multiplication.

Definition at line 13 of file eltwise_mul.c.

◆ smv_greater_equal_nc_vec_fxp()

void smv_greater_equal_nc_vec_fxp ( float16 *  host_inputs0,
float16 *  host_inputs1,
bool *  host_results,
float *  inputs0,
float *  inputs1,
bool *  results,
int  inputs_size 
)

SMVe implementation of elementwise greater-than-or-equal-to.

Definition at line 106 of file compare.c.

◆ smv_greater_nc_vec_fxp()

void smv_greater_nc_vec_fxp ( float16 *  host_inputs0,
float16 *  host_inputs1,
bool *  host_results,
float *  inputs0,
float *  inputs1,
bool *  results,
int  inputs_size 
)

SMVe implementation of elementwise greater-than.

Definition at line 77 of file compare.c.

◆ smv_less_equal_nc_vec_fxp()

void smv_less_equal_nc_vec_fxp ( float16 *  host_inputs0,
float16 *  host_inputs1,
bool *  host_results,
float *  inputs0,
float *  inputs1,
bool *  results,
int  inputs_size 
)

SMVe implementation of elementwise less-than-or-equal-to.

Definition at line 48 of file compare.c.

◆ smv_less_nc_vec_fxp()

void smv_less_nc_vec_fxp ( float16 *  host_inputs0,
float16 *  host_inputs1,
bool *  host_results,
float *  inputs0,
float *  inputs1,
bool *  results,
int  inputs_size 
)

SMVe implementation of elementwise less-than.

Definition at line 19 of file compare.c.

◆ smv_matrix_multiply_transpose_nc_vec_fxp()

void smv_matrix_multiply_transpose_nc_vec_fxp ( float16 *  host_a,
float16 *  host_b,
float16 *  host_results,
float *  a,
float *  b,
float *  results,
int  a_dims[2],
int  b_dims[2],
int  results_dims[2],
int  a_pad,
int  b_pad,
int  results_pad,
int  a_start,
int  result_start,
bool  accumulate,
bool  read_inputs,
bool  send_results,
activation_type  act_function,
activation_param_t  act_params,
SamplingInfo sampling 
)

Matrix b after transposition:

cols (originally rows) —>

rows [[—][—][—]] | [[—][—][—]] [[—][—][—]] v [[—][—][—]]

Each [—] represents an 8-wide vector. This inner product executes a 32-way MACC – 4 such 8-wide vectors – per PE, and 8 PEs, where each PE is assigned a row in in the transposed matrix. It continues across each row of b until the complete output pixel is finished (output stationary).

No biases are added.

Args:

Parameters
host_aHost buffer for a in NC.
host_bHost buffer for b in NC.
host_resultsHost results buffer in NC.
aLocal buffer for a in NC.
bLocal buffer for b in NC.
resultsLocal results buffer in NC.
a_dimsDimensions of a.
b_dimsDimensions of b.
results_dimsDimensions of the results.
a_padAlign padding size on the channel dimension of a.
b_padAlign padding size on the channel dimension of b.
results_padAlign padding size on the channel dimension of the results.
a_startIf a contains more activations than b, start from this one. Otherwise this should always be zero.
result_startIf the results contain more neurons than the b, start writing results from this one. Otherwise this should always be zero.
accumulateIf the original b tensor is tiled on activations, this should be set to true in order to avoid resetting the result buffer for knon-first b tiles.
read_inputsLoad inputs from the host. Set to false if the input activations can be reused from the last invocation.
send_resultsSend the results to the host memory if this is true.
act_functionActivation function the operator runs.
act_paramsParameters for the activation function.
samplingSimulation samplng settings.

Definition at line 59 of file matrix_multiply.c.

◆ smv_maxpooling_nhwc_vec_fxp()

void smv_maxpooling_nhwc_vec_fxp ( float16 *  host_inputs,
float16 *  host_results,
float *  inputs,
float *  results,
int  inputs_dims[4],
int  results_dims[4],
int  inputs_pad,
int  results_pad,
int  pool_rows,
int  pool_cols,
int  row_stride,
int  col_stride,
int  ofmap_start,
SamplingInfo sampling 
)

A max-pooling operation on SMV with NHWC format. This is the vectorized implementation.

Args:

Parameters
host_inputsHost inputs buffer in NHWC.
host_resultsHost results buffer in NHWC.
inputsLocal inputs buffer in NHWC.
resultsLocal results buffer in NHWC.
inputs_dimsDimensions of the inputs.
results_dimsDimensions of the results.
inputs_padAlign padding size on the channel dimension of the inputs.
results_padAlign padding size on the channel dimension of the results.
pool_rowsRow size of the pooling function.
pool_colsColumn size of the pooling function.
row_strideStride size on the row dimension.
col_strideStride size on the col dimension.
ofmap_startIf the results contains more channels than the inputs, start from this one. Otherwise this should always be zero.
samplingSimulation samplng settings.

Definition at line 36 of file pooling.c.

◆ smv_softmax_nc_vec_fxp()

void smv_softmax_nc_vec_fxp ( float16 *  host_inputs,
float16 *  host_results,
float *  inputs,
float *  results,
int  input_num,
int  input_size,
int  input_pad 
)

Top level function for softmax.

Definition at line 31 of file activation_functions_simd.c.