Kernel functions meant to be run under Aladdin. More...

Functions
float	batch_norm_op (float input, float mean, float recip_sqrt_var, float gamma, float beta)

void	ref_batch_norm_post_fc (float inputs, float mean, float variance, float gamma, float beta, float result, int input_nums, int input_size, int input_pad, activation_type act_function, activation_param_t act_params)

void	ref_batch_norm_nchw_post_conv (float inputs, float mean, float variance, float gamma, float beta, float result, int img_nums, int img_chans, int img_rows, int img_cols, int img_pad, int wgt_pad, activation_type act_function, activation_param_t act_params)

void	ref_batch_norm_nhwc_post_conv (float inputs, float mean, float variance, float gamma, float beta, float result, int img_nums, int img_rows, int img_cols, int img_chans, int img_pad, int wgt_pad, activation_type act_function, activation_param_t act_params)

void	ref_conv3d_nchw_valid_padding (float input, float kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)

void	ref_conv3d_nchw_same_padding (float input, float kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)

void	ref_conv3d_nhwc_valid_padding (float input, float kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)

void	ref_conv3d_nhwc_same_padding (float input, float kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_num, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad, activation_type act_function, activation_param_t act_params)

void	ref_conv2d_nchw_valid_padding (float input, float kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad)

void	ref_conv2d_nchw_same_padding (float input, float kernels, float *result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int k_rows, int k_cols, int k_pad, int k_row_stride, int k_col_stride, int res_rows, int res_cols, int res_pad)

void	ref_eltwise_add (float input0, float input1, float *results, int input_size)

void	ref_eltwise_mul (float input0, float input1, float *results, int input_size)

void	ref_greater (float input0, float input1, bool *results, int input_size)

void	ref_greater_equal (float input0, float input1, bool *results, int input_size)

void	ref_inner_product_ab_times_bc (float a, float b, float *c, int a_height, int a_width, int b_width, int a_pad, int b_pad, int c_pad, activation_type act_function, activation_param_t act_params)

void	ref_inner_product_ab_times_cb (float a, float b, float *c, int a_height, int b_width, int b_height, int a_pad, int b_pad, int c_pad, activation_type act_function, activation_param_t act_params)

void	ref_less (float input0, float input1, bool *results, int input_size)

void	ref_less_equal (float input0, float input1, bool *results, int input_size)

void	ref_max_pooling_nchw_treemax (float input, float result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)

void	ref_max_pooling_nhwc_treemax (float input, float result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)

void	ref_max_pooling_nchw_itermax (float input, float result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)

void	ref_max_pooling_nhwc_itermax (float input, float result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)

void	ref_avg_pooling_nchw (float input, float result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)

void	ref_avg_pooling_nhwc (float input, float result, int img_num, int img_chans, int img_rows, int img_cols, int img_pad, int res_rows, int res_cols, int res_pad, int pool_row_size, int pool_col_size, int pool_row_stride, int pool_col_stride)

void	ref_softmax_nc (float inputs, float results, int input_num, int input_size, int input_pad)

void	smv_activation_fun_nc_vec_fxp (float16 host_inputs, float16 host_results, float inputs, float results, int inputs_size, activation_type function, activation_param_t params)

void	smv_softmax_nc_vec_fxp (float16 host_inputs, float16 host_results, float inputs, float results, int input_num, int input_size, int input_pad)

v8fp_t	batch_norm_simd_op (v8fp_t input, v8fp_t mean, v8fp_t recip_sqrt_var, v8fp_t gamma, v8fp_t beta)

void	smv_batch_norm_post_fc_nc_vec_fxp (float16 host_inputs, float16 host_weights, float16 host_results, float inputs, float weights, float results, int inputs_dims[2], int weights_acts, int inputs_pad, int inputs_start, int send_results, activation_type act_function, activation_param_t act_params)

void	smv_batch_norm_post_conv_nchw_vec_fxp (float16 host_inputs, float16 host_weights, float16 host_results, float inputs, float weights, float results, int inputs_dims[4], int weights_chans, int inputs_pad, int weights_pad, int weights_start, activation_type act_function, activation_param_t act_params)

void	smv_batch_norm_post_conv_nhwc_vec_fxp (float16 host_inputs, float16 host_weights, float16 host_results, float inputs, float weights, float results, int inputs_dims[4], int weights_chans, int inputs_pad, int weights_pad, int weights_start, activation_type act_function, activation_param_t act_params, SamplingInfo *sampling)

void	smv_less_nc_vec_fxp (float16 host_inputs0, float16 host_inputs1, bool host_results, float inputs0, float inputs1, bool results, int inputs_size)

void	smv_less_equal_nc_vec_fxp (float16 host_inputs0, float16 host_inputs1, bool host_results, float inputs0, float inputs1, bool results, int inputs_size)

void	smv_greater_nc_vec_fxp (float16 host_inputs0, float16 host_inputs1, bool host_results, float inputs0, float inputs1, bool results, int inputs_size)

void	smv_greater_equal_nc_vec_fxp (float16 host_inputs0, float16 host_inputs1, bool host_results, float inputs0, float inputs1, bool results, int inputs_size)

void	smv_conv3d_nhwc_vec_fxp (float16 host_inputs, float16 host_weights, float16 host_results, float inputs, float weights, float results, int inputs_dims[4], int weights_dims[4], int results_dims[4], int inputs_align_pad, int weights_pad, int results_pad, int inputs_halo_pad[4], int row_stride, int col_stride, int ifmap_start, int kern_start, bool accumulate, bool read_inputs, bool read_weights, bool send_results, activation_type act_function, activation_param_t act_params, SamplingInfo *sampling)

void	smv_eltwise_add_nc_vec_fxp (float16 host_inputs0, float16 host_inputs1, float16 host_results, float inputs0, float inputs1, float results, int inputs_size)

void	smv_eltwise_mul_nc_vec_fxp (float16 host_inputs0, float16 host_inputs1, float16 host_results, float inputs0, float inputs1, float results, int inputs_size)

void	host_load_fp16 (float local_data, float16 remote_data, int num_elems, int local_offset, int remote_offset)

void	host_store_fp16 (float local_data, float16 remote_data, int num_elems, int local_offset, int remote_offset)

void	smv_matrix_multiply_transpose_nc_vec_fxp (float16 host_a, float16 host_b, float16 host_results, float a, float b, float results, int a_dims[2], int b_dims[2], int results_dims[2], int a_pad, int b_pad, int results_pad, int a_start, int result_start, bool accumulate, bool read_inputs, bool send_results, activation_type act_function, activation_param_t act_params, SamplingInfo *sampling)

void	smv_maxpooling_nhwc_vec_fxp (float16 host_inputs, float16 host_results, float inputs, float results, int inputs_dims[4], int results_dims[4], int inputs_pad, int results_pad, int pool_rows, int pool_cols, int row_stride, int col_stride, int ofmap_start, SamplingInfo *sampling)

void	smv_avgpooling_nhwc_vec_fxp (float16 host_inputs, float16 host_results, float inputs, float results, int inputs_dims[4], int results_dims[4], int inputs_pad, int results_pad, int pool_rows, int pool_cols, int row_stride, int col_stride, int ofmap_start, SamplingInfo *sampling)

void	ref_activation_fun_nc (float inputs, float results, int inputs_size, activation_type function, activation_param_t params)
	Top level entry point for all Reference activation functions.

Detailed Description

Kernel functions meant to be run under Aladdin.

Function Documentation

◆ batch_norm_op()

float batch_norm_op	(	float	input,
		float	mean,
		float	recip_sqrt_var,
		float	gamma,
		float	beta
	)

Batch normalizes one input value.

Parameters

input	Input activation.
mean	Batch mean
recip_sqrt_var	1/sqrt(var + eps), which is precomputed to avoid having to run a sqrt and division in the ASIC.
gamma	Gamma parameter.
beta	Beta parameter.

Definition at line 23 of file ref_batch_norm_op.cpp.

◆ batch_norm_simd_op()

v8fp_t batch_norm_simd_op	(	v8fp_t	input,
		v8fp_t	mean,
		v8fp_t	recip_sqrt_var,
		v8fp_t	gamma,
		v8fp_t	beta
	)

Batch normalizes one input value.

Parameters

input	Input activation.
mean	Batch mean
recip_sqrt_var	1/sqrt(var + eps), which is precomputed to avoid having to run a sqrt and division in the ASIC.
gamma	Gamma parameter.
beta	Beta parameter.

Definition at line 25 of file batch_norm.c.

◆ host_load_fp16()

void host_load_fp16	(	float *	local_data,
		float16 *	remote_data,
		int	num_elems,
		int	local_offset,
		int	remote_offset
	)

Loads half-precision fp data from the host and locally on the accelerator converts it into single-precision data.

The transfer operation is pipelined so it can be overlapped with the conversion operation. Each transfer is at most one page in size (4KB), which is converted into 8KB of data. The conversion is done in-place, so no additional SRAM is required to buffer the FP16 data.

Parameters

local_data	Single-precision accelerator-local scratchpad.
remote_data	Half-precision host memory address.
num_elems	Number of elements to copy.
local_offset	Offset into local array to start copying data to in elements.
remote_offset	Offset into remote memory to start copying data from in elements.

Definition at line 7 of file load_store_fp16_data.c.

◆ host_store_fp16()

void host_store_fp16	(	float *	local_data,
		float16 *	remote_data,
		int	num_elems,
		int	local_offset,
		int	remote_offset
	)

Converts single-precision fp data from the accelerator into half-precision data and copy it to the host.

The transfer operation is pipelined so it can be overlapped with the conversion operation. Each transfer is at most one page in size (4KB), which is converted from 8KB of data. The conversion is done in-place, so no additional SRAM is required to buffer the FP16 data.

Parameters

local_data	Single-precision accelerator-local scratchpad.
remote_data	Half-precision host memory address.
num_elems	Number of elements to copy.
local_offset	Offset into local array to start copying data to in elements.
remote_offset	Offset into remote memory to start copying data from in elements.

Definition at line 45 of file load_store_fp16_data.c.

◆ ref_avg_pooling_nchw()

void ref_avg_pooling_nchw	(	float *	input,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		int	pool_row_size,
		int	pool_col_size,
		int	pool_row_stride,
		int	pool_col_stride
	)

A Reference implementation of AvgPoolingOp on NCHW data.

Definition at line 256 of file ref_pooling_op.cpp.

◆ ref_avg_pooling_nhwc()

void ref_avg_pooling_nhwc	(	float *	input,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		int	pool_row_size,
		int	pool_col_size,
		int	pool_row_stride,
		int	pool_col_stride
	)

AladdinKernels Reference implementation of AvgPoolingOp on NHWC data.

Definition at line 308 of file ref_pooling_op.cpp.

◆ ref_batch_norm_nchw_post_conv()

void ref_batch_norm_nchw_post_conv	(	float *	inputs,
		float *	mean,
		float *	variance,
		float *	gamma,
		float *	beta,
		float *	result,
		int	img_nums,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	wgt_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of batch normalization following a convolutional/pooling layer on NCHW data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 85 of file ref_batch_norm_op.cpp.

◆ ref_batch_norm_nhwc_post_conv()

void ref_batch_norm_nhwc_post_conv	(	float *	inputs,
		float *	mean,
		float *	variance,
		float *	gamma,
		float *	beta,
		float *	result,
		int	img_nums,
		int	img_rows,
		int	img_cols,
		int	img_chans,
		int	img_pad,
		int	wgt_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of batch normalization following a convolutional/pooling layer on NHWC data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 147 of file ref_batch_norm_op.cpp.

◆ ref_batch_norm_post_fc()

void ref_batch_norm_post_fc	(	float *	inputs,
		float *	mean,
		float *	variance,
		float *	gamma,
		float *	beta,
		float *	result,
		int	input_nums,
		int	input_size,
		int	input_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of batch normalization following a fully-connected layer.

In this case, we have one pair of gamma/beta weights per activation.

Definition at line 40 of file ref_batch_norm_op.cpp.

◆ ref_conv2d_nchw_same_padding()

void ref_conv2d_nchw_same_padding	(	float *	input,
		float *	kernels,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	k_rows,
		int	k_cols,
		int	k_pad,
		int	k_row_stride,
		int	k_col_stride,
		int	res_rows,
		int	res_cols,
		int	res_pad
	)

A Reference implementation of a depthwise convolution on NCHW data with same padding.

Definition at line 83 of file ref_depthwise_convolution_op.cpp.

◆ ref_conv2d_nchw_valid_padding()

void ref_conv2d_nchw_valid_padding	(	float *	input,
		float *	kernels,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	k_rows,
		int	k_cols,
		int	k_pad,
		int	k_row_stride,
		int	k_col_stride,
		int	res_rows,
		int	res_cols,
		int	res_pad
	)

A Reference implementation of a depthwise convolution on NCHW data with valid padding. The Reference backend requires no alignment padding so all _pad parameters can be zero.

Definition at line 15 of file ref_depthwise_convolution_op.cpp.

◆ ref_conv3d_nchw_same_padding()

void ref_conv3d_nchw_same_padding	(	float *	input,
		float *	kernels,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	k_num,
		int	k_rows,
		int	k_cols,
		int	k_pad,
		int	k_row_stride,
		int	k_col_stride,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of a 3D convolution on NCHW data with same padding.

Definition at line 94 of file ref_convolution_op.cpp.

◆ ref_conv3d_nchw_valid_padding()

void ref_conv3d_nchw_valid_padding	(	float *	input,
		float *	kernels,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	k_num,
		int	k_rows,
		int	k_cols,
		int	k_pad,
		int	k_row_stride,
		int	k_col_stride,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of a 3D convolution on NCHW data with valid padding.

Definition at line 16 of file ref_convolution_op.cpp.

◆ ref_conv3d_nhwc_same_padding()

void ref_conv3d_nhwc_same_padding	(	float *	input,
		float *	kernels,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	k_num,
		int	k_rows,
		int	k_cols,
		int	k_pad,
		int	k_row_stride,
		int	k_col_stride,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of a 3D convolution on NHWC data with same padding.

Definition at line 266 of file ref_convolution_op.cpp.

◆ ref_conv3d_nhwc_valid_padding()

void ref_conv3d_nhwc_valid_padding	(	float *	input,
		float *	kernels,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	k_num,
		int	k_rows,
		int	k_cols,
		int	k_pad,
		int	k_row_stride,
		int	k_col_stride,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of a 3D convolution on NHWC data with valid padding.

Definition at line 188 of file ref_convolution_op.cpp.

◆ ref_eltwise_add()

void ref_eltwise_add	(	float *	input0,
		float *	input1,
		float *	results,
		int	input_size
	)

A Reference implementation of elementwise addition.

Definition at line 13 of file ref_eltwise_add_op.cpp.

◆ ref_eltwise_mul()

void ref_eltwise_mul	(	float *	input0,
		float *	input1,
		float *	results,
		int	input_size
	)

A Reference implementation of elementwise multiplication.

Definition at line 13 of file ref_eltwise_mul_op.cpp.

◆ ref_greater()

void ref_greater	(	float *	input0,
		float *	input1,
		bool *	results,
		int	input_size
	)

A Reference implementation of elementwise greater-than.

Definition at line 13 of file ref_greater_op.cpp.

◆ ref_greater_equal()

void ref_greater_equal	(	float *	input0,
		float *	input1,
		bool *	results,
		int	input_size
	)

A Reference implementation of elementwise greater-than-or-equal-to.

Definition at line 27 of file ref_greater_op.cpp.

◆ ref_inner_product_ab_times_bc()

void ref_inner_product_ab_times_bc	(	float *	a,
		float *	b,
		float *	c,
		int	a_height,
		int	a_width,
		int	b_width,
		int	a_pad,
		int	b_pad,
		int	c_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of an inner product operator: C = A x B.

Parameters

a	A matrix of dimensions a_height x a_width
b	A matrix of dimensions a_width x b_width
c	A matrix of dimensions a_height x b_width
a_height	Number of rows in A
a_width	Number of columns in A
b_width	Number of columns in B
a_pad	Additional alignment zero-padding on a.
b_pad	Additional alignment zero-padding on b.
c_pad	Additional alignment zero-padding on c.
act_function	The activation function to apply on the result of the inner product.
act_params	Parameters to the activation function.

Definition at line 28 of file ref_inner_product_op.cpp.

◆ ref_inner_product_ab_times_cb()

void ref_inner_product_ab_times_cb	(	float *	a,
		float *	b,
		float *	c,
		int	a_height,
		int	b_width,
		int	b_height,
		int	a_pad,
		int	b_pad,
		int	c_pad,
		activation_type	act_function,
		activation_param_t	act_params
	)

A Reference implementation of an inner product operator. C = A x * B_transpose.

Parameters

a	A matrix of dimensions a_height x b_width
b	A matrix of dimensions b_height x b_width
c	A matrix of dimensions a_height x b_width
a_height	Number of rows in A
b_width	Number of columns in B
b_height	Number of rows in B
a_pad	Additional alignment zero-padding on a.
b_pad	Additional alignment zero-padding on b.
c_pad	Additional alignment zero-padding on c.
act_function	The activation function to apply on the result of the inner product.
act_params	Parameters to the activation function.

Definition at line 87 of file ref_inner_product_op.cpp.

◆ ref_less()

void ref_less	(	float *	input0,
		float *	input1,
		bool *	results,
		int	input_size
	)

A Reference implementation of less-than.

Definition at line 13 of file ref_less_op.cpp.

◆ ref_less_equal()

void ref_less_equal	(	float *	input0,
		float *	input1,
		bool *	results,
		int	input_size
	)

A Reference implementation of less-than-or-equal-to.

Definition at line 27 of file ref_less_op.cpp.

◆ ref_max_pooling_nchw_itermax()

void ref_max_pooling_nchw_itermax	(	float *	input,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		int	pool_row_size,
		int	pool_col_size,
		int	pool_row_stride,
		int	pool_col_stride
	)

A Reference implementation of MaxPoolingOp on NCHW data, using a loop-based maximum function.

Definition at line 151 of file ref_pooling_op.cpp.

◆ ref_max_pooling_nchw_treemax()

void ref_max_pooling_nchw_treemax	(	float *	input,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		int	pool_row_size,
		int	pool_col_size,
		int	pool_row_stride,
		int	pool_col_stride
	)

A Reference implementation of MaxPoolingOp on NCHW data, using a tree-based maximum function.

Definition at line 15 of file ref_pooling_op.cpp.

◆ ref_max_pooling_nhwc_itermax()

void ref_max_pooling_nhwc_itermax	(	float *	input,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		int	pool_row_size,
		int	pool_col_size,
		int	pool_row_stride,
		int	pool_col_stride
	)

A Reference implementation of MaxPoolingOp on NHWC data, using a loop-based maximum function.

Definition at line 204 of file ref_pooling_op.cpp.

◆ ref_max_pooling_nhwc_treemax()

void ref_max_pooling_nhwc_treemax	(	float *	input,
		float *	result,
		int	img_num,
		int	img_chans,
		int	img_rows,
		int	img_cols,
		int	img_pad,
		int	res_rows,
		int	res_cols,
		int	res_pad,
		int	pool_row_size,
		int	pool_col_size,
		int	pool_row_stride,
		int	pool_col_stride
	)

A Reference implementation of MaxPoolingOp on NHWC data, using a tree-based maximum function.

Definition at line 83 of file ref_pooling_op.cpp.

◆ ref_softmax_nc()

void ref_softmax_nc	(	float *	inputs,
		float *	results,
		int	input_num,
		int	input_size,
		int	input_pad
	)

A Reference implementation of the softmax function.

The softmax function exponentiates each element and then normalizes each row to sum to 1. To improve numerical stability, we use the max trick: all elements are first subtracted by the maximum value in each input before being exponentiated.

Parameters

inputs	Matrix of size input_num x input_size, stored rowmajor. This contains both inputs and the outputs.
results	Output array.
input_num	Batch size.
input_size	Number of activations per input.
input_pad	Alignment padding.

Definition at line 27 of file ref_softmax_op.cpp.

◆ smv_activation_fun_nc_vec_fxp()

void smv_activation_fun_nc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_results,
		float *	inputs,
		float *	results,
		int	inputs_size,
		activation_type	function,
		activation_param_t	params
	)

Top level function entry for all unary SMV activation functions.

Definition at line 13 of file activation_functions_simd.c.

◆ smv_avgpooling_nhwc_vec_fxp()

void smv_avgpooling_nhwc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_results,
		float *	inputs,
		float *	results,
		int	inputs_dims[4],
		int	results_dims[4],
		int	inputs_pad,
		int	results_pad,
		int	pool_rows,
		int	pool_cols,
		int	row_stride,
		int	col_stride,
		int	ofmap_start,
		SamplingInfo *	sampling
	)

An average-pooling operation on SMV with NHWC format. This is the vectorized implementation.

This requires a blocked channel data format (GNHWC), where G = channels/8, and the last dimension = chans = 8. The last dimension MUST be 8. This supports arbitrary pooling sizes and strides.

Parameters

host_inputs	Host inputs buffer in NHWC.
host_results	Host results buffer in NHWC.
inputs	Local inputs buffer in NHWC.
results	Local results buffer in NHWC.
inputs_dims	Dimensions of the inputs.
results_dims	Dimensions of the results.
inputs_pad	Align padding size on the channel dimension of the inputs.
results_pad	Align padding size on the channel dimension of the results.
pool_rows	Row size of the pooling function.
pool_cols	Column size of the pooling function.
row_stride	Stride size on the row dimension.
col_stride	Stride size on the col dimension.
ofmap_start	If the results contains more channels than the inputs, start from this one. Otherwise this should always be zero.
sampling	Simulation samplng settings.

Definition at line 166 of file pooling.c.

◆ smv_batch_norm_post_conv_nchw_vec_fxp()

void smv_batch_norm_post_conv_nchw_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_weights,
		float16 *	host_results,
		float *	inputs,
		float *	weights,
		float *	results,
		int	inputs_dims[4],
		int	weights_chans,
		int	inputs_pad,
		int	weights_pad,
		int	weights_start,
		activation_type	act_function,
		activation_param_t	act_params
	)

SMV implementation of batch normalization following a convolutional/pooling layer on NCHW data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 100 of file batch_norm.c.

◆ smv_batch_norm_post_conv_nhwc_vec_fxp()

void smv_batch_norm_post_conv_nhwc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_weights,
		float16 *	host_results,
		float *	inputs,
		float *	weights,
		float *	results,
		int	inputs_dims[4],
		int	weights_chans,
		int	inputs_pad,
		int	weights_pad,
		int	weights_start,
		activation_type	act_function,
		activation_param_t	act_params,
		SamplingInfo *	sampling
	)

SMV implementation of batch normalization following a convolutional/pooling layer on NHWC data.

After conv/pooling, we only have a gamma/beta per output feature map, not per activation.

Definition at line 196 of file batch_norm.c.

◆ smv_batch_norm_post_fc_nc_vec_fxp()

void smv_batch_norm_post_fc_nc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_weights,
		float16 *	host_results,
		float *	inputs,
		float *	weights,
		float *	results,
		int	inputs_dims[2],
		int	weights_acts,
		int	inputs_pad,
		int	inputs_start,
		int	send_results,
		activation_type	act_function,
		activation_param_t	act_params
	)

SMV implementation of batch normalization following a fully-connected layer.

In this case, we have one pair of gamma/beta weights per activation.

Definition at line 41 of file batch_norm.c.

◆ smv_conv3d_nhwc_vec_fxp()

void smv_conv3d_nhwc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_weights,
		float16 *	host_results,
		float *	inputs,
		float *	weights,
		float *	results,
		int	inputs_dims[4],
		int	weights_dims[4],
		int	results_dims[4],
		int	inputs_align_pad,
		int	weights_pad,
		int	results_pad,
		int	inputs_halo_pad[4],
		int	row_stride,
		int	col_stride,
		int	ifmap_start,
		int	kern_start,
		bool	accumulate,
		bool	read_inputs,
		bool	read_weights,
		bool	send_results,
		activation_type	act_function,
		activation_param_t	act_params,
		SamplingInfo *	sampling
	)

Perform a 3D convolution with one kernel on an image, with reduction in NHWC format. This is the vectorized implementation.

Parameters

host_inputs	Host inputs buffer in NHWC.
host_weights	Host weights buffer in NHWC.
host_results	Host results buffer in NHWC.
inputs	Local inputs buffer in NHWC.
weights	Local weights buffer in NHWC.
results	Local results buffer in NHWC.
inputs_dims	Dimensions of the inputs.
weights_dims	Dimensions of the weights.
results_dims	Dimensions of the results.
inputs_align_pad	Alignment padding size on the channel dimension of the inputs.
weights_pad	Alignment padding size on the channel dimension of the weights.
results_pad	Alignment padding size on the channel dimension of the results.
inputs_halo_pad	Padding sizes on top, bottom, left and right of the input 2D feature maps.
row_stride	Stride size on the row dimension.
col_stride	Stride size on the col dimension.
ifmap_start	If the input contains more channels than the weights, start from this one. Otherwise this should always be zero.
kern_start	If the weights contain more kernels than the results buffer can fit, start from this one. Otherwise this should always be zero.
accumulate	If the original weight tensor is tiled channelwise, this should be set to true in order to avoid resetting the result buffer for non-first weight tiles.
read_inputs	Load inputs from the host. Set to false if the input activations can be reused from the last invocation.
read_weights	Load weights from the host. Set to false if the weights can be reused from the last invocation.
send_results	Send the results to the host memory if this is true.
act_function	Activation function the operator runs.
act_params	Parameters for the activation function.
sampling	Simulation samplng settings.

Definition at line 53 of file convolution_simd.c.

◆ smv_eltwise_add_nc_vec_fxp()

void smv_eltwise_add_nc_vec_fxp	(	float16 *	host_inputs0,
		float16 *	host_inputs1,
		float16 *	host_results,
		float *	inputs0,
		float *	inputs1,
		float *	results,
		int	inputs_size
	)

SMV implementation of elementwise addition.

Definition at line 13 of file eltwise_add.c.

◆ smv_eltwise_mul_nc_vec_fxp()

void smv_eltwise_mul_nc_vec_fxp	(	float16 *	host_inputs0,
		float16 *	host_inputs1,
		float16 *	host_results,
		float *	inputs0,
		float *	inputs1,
		float *	results,
		int	inputs_size
	)

SMV implementation of elementwise multiplication.

Definition at line 13 of file eltwise_mul.c.

◆ smv_greater_equal_nc_vec_fxp()

void smv_greater_equal_nc_vec_fxp	(	float16 *	host_inputs0,
		float16 *	host_inputs1,
		bool *	host_results,
		float *	inputs0,
		float *	inputs1,
		bool *	results,
		int	inputs_size
	)

SMVe implementation of elementwise greater-than-or-equal-to.

Definition at line 106 of file compare.c.

◆ smv_greater_nc_vec_fxp()

void smv_greater_nc_vec_fxp	(	float16 *	host_inputs0,
		float16 *	host_inputs1,
		bool *	host_results,
		float *	inputs0,
		float *	inputs1,
		bool *	results,
		int	inputs_size
	)

SMVe implementation of elementwise greater-than.

Definition at line 77 of file compare.c.

◆ smv_less_equal_nc_vec_fxp()

void smv_less_equal_nc_vec_fxp	(	float16 *	host_inputs0,
		float16 *	host_inputs1,
		bool *	host_results,
		float *	inputs0,
		float *	inputs1,
		bool *	results,
		int	inputs_size
	)

SMVe implementation of elementwise less-than-or-equal-to.

Definition at line 48 of file compare.c.

◆ smv_less_nc_vec_fxp()

void smv_less_nc_vec_fxp	(	float16 *	host_inputs0,
		float16 *	host_inputs1,
		bool *	host_results,
		float *	inputs0,
		float *	inputs1,
		bool *	results,
		int	inputs_size
	)

SMVe implementation of elementwise less-than.

Definition at line 19 of file compare.c.

◆ smv_matrix_multiply_transpose_nc_vec_fxp()

void smv_matrix_multiply_transpose_nc_vec_fxp	(	float16 *	host_a,
		float16 *	host_b,
		float16 *	host_results,
		float *	a,
		float *	b,
		float *	results,
		int	a_dims[2],
		int	b_dims[2],
		int	results_dims[2],
		int	a_pad,
		int	b_pad,
		int	results_pad,
		int	a_start,
		int	result_start,
		bool	accumulate,
		bool	read_inputs,
		bool	send_results,
		activation_type	act_function,
		activation_param_t	act_params,
		SamplingInfo *	sampling
	)

Matrix b after transposition:

cols (originally rows) —>

rows [[—][—][—]] | [[—][—][—]] [[—][—][—]] v [[—][—][—]]

Each [—] represents an 8-wide vector. This inner product executes a 32-way MACC – 4 such 8-wide vectors – per PE, and 8 PEs, where each PE is assigned a row in in the transposed matrix. It continues across each row of b until the complete output pixel is finished (output stationary).

No biases are added.

Args:

Parameters

host_a	Host buffer for a in NC.
host_b	Host buffer for b in NC.
host_results	Host results buffer in NC.
a	Local buffer for a in NC.
b	Local buffer for b in NC.
results	Local results buffer in NC.
a_dims	Dimensions of a.
b_dims	Dimensions of b.
results_dims	Dimensions of the results.
a_pad	Align padding size on the channel dimension of a.
b_pad	Align padding size on the channel dimension of b.
results_pad	Align padding size on the channel dimension of the results.
a_start	If a contains more activations than b, start from this one. Otherwise this should always be zero.
result_start	If the results contain more neurons than the b, start writing results from this one. Otherwise this should always be zero.
accumulate	If the original b tensor is tiled on activations, this should be set to true in order to avoid resetting the result buffer for knon-first b tiles.
read_inputs	Load inputs from the host. Set to false if the input activations can be reused from the last invocation.
send_results	Send the results to the host memory if this is true.
act_function	Activation function the operator runs.
act_params	Parameters for the activation function.
sampling	Simulation samplng settings.

Definition at line 59 of file matrix_multiply.c.

◆ smv_maxpooling_nhwc_vec_fxp()

void smv_maxpooling_nhwc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_results,
		float *	inputs,
		float *	results,
		int	inputs_dims[4],
		int	results_dims[4],
		int	inputs_pad,
		int	results_pad,
		int	pool_rows,
		int	pool_cols,
		int	row_stride,
		int	col_stride,
		int	ofmap_start,
		SamplingInfo *	sampling
	)

A max-pooling operation on SMV with NHWC format. This is the vectorized implementation.

Args:

Parameters

host_inputs	Host inputs buffer in NHWC.
host_results	Host results buffer in NHWC.
inputs	Local inputs buffer in NHWC.
results	Local results buffer in NHWC.
inputs_dims	Dimensions of the inputs.
results_dims	Dimensions of the results.
inputs_pad	Align padding size on the channel dimension of the inputs.
results_pad	Align padding size on the channel dimension of the results.
pool_rows	Row size of the pooling function.
pool_cols	Column size of the pooling function.
row_stride	Stride size on the row dimension.
col_stride	Stride size on the col dimension.
ofmap_start	If the results contains more channels than the inputs, start from this one. Otherwise this should always be zero.
sampling	Simulation samplng settings.

Definition at line 36 of file pooling.c.

◆ smv_softmax_nc_vec_fxp()

void smv_softmax_nc_vec_fxp	(	float16 *	host_inputs,
		float16 *	host_results,
		float *	inputs,
		float *	results,
		int	input_num,
		int	input_size,
		int	input_pad
	)

Top level function for softmax.

Definition at line 31 of file activation_functions_simd.c.

Functions

Detailed Description

Function Documentation

◆ batch_norm_op()

◆ batch_norm_simd_op()

◆ host_load_fp16()

◆ host_store_fp16()

◆ ref_avg_pooling_nchw()

◆ ref_avg_pooling_nhwc()

◆ ref_batch_norm_nchw_post_conv()

◆ ref_batch_norm_nhwc_post_conv()

◆ ref_batch_norm_post_fc()

◆ ref_conv2d_nchw_same_padding()

◆ ref_conv2d_nchw_valid_padding()

◆ ref_conv3d_nchw_same_padding()

◆ ref_conv3d_nchw_valid_padding()

◆ ref_conv3d_nhwc_same_padding()

◆ ref_conv3d_nhwc_valid_padding()

◆ ref_eltwise_add()

◆ ref_eltwise_mul()

◆ ref_greater()

◆ ref_greater_equal()

◆ ref_inner_product_ab_times_bc()

◆ ref_inner_product_ab_times_cb()

◆ ref_less()

◆ ref_less_equal()

◆ ref_max_pooling_nchw_itermax()

◆ ref_max_pooling_nchw_treemax()

◆ ref_max_pooling_nhwc_itermax()

◆ ref_max_pooling_nhwc_treemax()

◆ ref_softmax_nc()

◆ smv_activation_fun_nc_vec_fxp()

◆ smv_avgpooling_nhwc_vec_fxp()

◆ smv_batch_norm_post_conv_nchw_vec_fxp()

◆ smv_batch_norm_post_conv_nhwc_vec_fxp()

◆ smv_batch_norm_post_fc_nc_vec_fxp()

◆ smv_conv3d_nhwc_vec_fxp()

◆ smv_eltwise_add_nc_vec_fxp()

◆ smv_eltwise_mul_nc_vec_fxp()

◆ smv_greater_equal_nc_vec_fxp()

◆ smv_greater_nc_vec_fxp()

◆ smv_less_equal_nc_vec_fxp()

◆ smv_less_nc_vec_fxp()

◆ smv_matrix_multiply_transpose_nc_vec_fxp()

◆ smv_maxpooling_nhwc_vec_fxp()

◆ smv_softmax_nc_vec_fxp()