SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
ref_batch_norm_op.cpp
1 #include "smaug/core/backend.h"
3 #include "smaug/operators/batch_norm_op.h"
4 #include "smaug/operators/ref/ref_activation_fun_op.h"
5 #include "smaug/utility/debug_stream.h"
6 
7 #ifdef __cplusplus
8 extern "C" {
9 #endif
10 
23 float batch_norm_op(float input,
24  float mean,
25  float recip_sqrt_var,
26  float gamma,
27  float beta) {
28  float scale = recip_sqrt_var * gamma;
29  float shift = input - mean;
30  return shift * scale + beta;
31 }
32 
40 void ref_batch_norm_post_fc(float* inputs,
41  float* mean,
42  float* variance,
43  float* gamma,
44  float* beta,
45  float* result,
46  int input_nums,
47  int input_size,
48  int input_pad,
49  activation_type act_function,
50  activation_param_t act_params) {
51  int inputs_size = input_nums * (input_size + input_pad);
52  int kernel_size = inputs_size;
53  int result_size = inputs_size;
54  dmaLoad(inputs, inputs, inputs_size * sizeof(float));
55  dmaLoad(mean, mean, kernel_size * sizeof(float));
56  dmaLoad(variance, variance, kernel_size * sizeof(float));
57  dmaLoad(gamma, gamma, kernel_size * sizeof(float));
58  dmaLoad(beta, beta, kernel_size * sizeof(float));
59 
60  ARRAY_2D(float, _inputs, inputs, input_size + input_pad);
61  ARRAY_2D(float, _result, result, input_size + input_pad);
62 
63  bn_batch:
64  for (int i = 0; i < input_nums; i++) {
65  bn_input:
66  for (int j = 0; j < input_size; j++) {
67  _result[i][j] = batch_norm_op(
68  _inputs[i][j], mean[j], variance[j], gamma[j], beta[j]);
69  }
70  }
71  if (act_function != NO_ACTIVATION) {
72  activation_fun(result, result, result_size, act_function, act_params);
73  }
74  dmaStore(result, result, result_size * sizeof(float));
75 }
76 
85 void ref_batch_norm_nchw_post_conv(float* inputs,
86  float* mean,
87  float* variance,
88  float* gamma,
89  float* beta,
90  float* result,
91  int img_nums,
92  int img_chans,
93  int img_rows,
94  int img_cols,
95  int img_pad,
96  int wgt_pad,
97  activation_type act_function,
98  activation_param_t act_params) {
99  int input_size = img_nums * img_chans * img_rows * (img_cols + img_pad);
100  int kernel_size = img_chans;
101  int result_size = input_size;
102  dmaLoad(inputs, inputs, input_size * sizeof(float));
103  dmaLoad(mean, mean, kernel_size * sizeof(float));
104  dmaLoad(variance, variance, kernel_size * sizeof(float));
105  dmaLoad(gamma, gamma, kernel_size * sizeof(float));
106  dmaLoad(beta, beta, kernel_size * sizeof(float));
107 
108  ARRAY_4D(float, _inputs, inputs, img_chans, img_rows, img_cols + img_pad);
109  ARRAY_4D(float, _result, result, img_chans, img_rows, img_cols + img_pad);
110 
111  bn_batch:
112  for (int i = 0; i < img_nums; i++) {
113  bn_chan:
114  for (int h = 0; h < img_chans; h++) {
115  float mean_val = mean[h];
116  float recip_sqrt_var_val = variance[h];
117  float gamma_val = gamma[h];
118  float beta_val = beta[h];
119 
120  bn_row:
121  for (int r = 0; r < img_rows; r++) {
122  bn_col:
123  for (int c = 0; c < img_cols; c++) {
124  _result[i][h][r][c] = batch_norm_op(_inputs[i][h][r][c],
125  mean_val,
126  recip_sqrt_var_val,
127  gamma_val,
128  beta_val);
129  }
130  }
131  }
132  }
133  if (act_function != NO_ACTIVATION) {
134  activation_fun(result, result, result_size, act_function, act_params);
135  }
136  dmaStore(result, result, result_size * sizeof(float));
137 }
138 
147 void ref_batch_norm_nhwc_post_conv(float* inputs,
148  float* mean,
149  float* variance,
150  float* gamma,
151  float* beta,
152  float* result,
153  int img_nums,
154  int img_rows,
155  int img_cols,
156  int img_chans,
157  int img_pad,
158  int wgt_pad,
159  activation_type act_function,
160  activation_param_t act_params) {
161  int input_size = img_nums * img_rows * img_cols * (img_chans + img_pad);
162  int kernel_size = img_chans;
163  int result_size = input_size;
164  dmaLoad(inputs, inputs, input_size * sizeof(float));
165  dmaLoad(mean, mean, kernel_size * sizeof(float));
166  dmaLoad(variance, variance, kernel_size * sizeof(float));
167  dmaLoad(gamma, gamma, kernel_size * sizeof(float));
168  dmaLoad(beta, beta, kernel_size * sizeof(float));
169 
170  ARRAY_4D(float, _inputs, inputs, img_rows, img_cols, img_chans + img_pad);
171  ARRAY_4D(float, _result, result, img_rows, img_cols, img_chans + img_pad);
172 
173  bn_batch:
174  for (int i = 0; i < img_nums; i++) {
175  bn_chan:
176  for (int h = 0; h < img_chans; h++) {
177  float mean_val = mean[h];
178  float recip_sqrt_var_val = variance[h];
179  float gamma_val = gamma[h];
180  float beta_val = beta[h];
181  bn_row:
182  for (int r = 0; r < img_rows; r++) {
183  bn_col:
184  for (int c = 0; c < img_cols; c++) {
185  _result[i][r][c][h] = batch_norm_op(_inputs[i][r][c][h],
186  mean_val,
187  recip_sqrt_var_val,
188  gamma_val,
189  beta_val);
190  }
191  }
192  }
193  }
194  if (act_function != NO_ACTIVATION) {
195  activation_fun(result, result, result_size, act_function, act_params);
196  }
197  dmaStore(result, result, result_size * sizeof(float));
198 }
199 
200 #ifdef __cplusplus
201 }
202 #endif
203 
204 namespace smaug {
205 
206 template <>
207 void BatchNormOp<ReferenceBackend>::run() {
208  auto input = getInput(Inputs);
209  auto mean = getInput(Mean);
210  auto variance = getInput(Variance);
211  auto gamma = getInput(Gamma);
212  auto beta = getInput(Beta);
213  auto output = getOutput(Outputs);
214  const TensorShape& inputShape = input->getShape();
215  const TensorShape& kernelShape = mean->getShape();
216  const TensorShape& outputShape = output->getShape();
217  bool isPostConv = (input->ndims() == 4);
218  dout(2) << *mean << "\n";
219  dout(2) << *variance<< "\n";
220  dout(2) << *gamma << "\n";
221  dout(2) << *beta << "\n";
222 
223  float* inputData = input->data<float>();
224  float* meanData = mean->data<float>();
225  float* varianceData = variance->data<float>();
226  float* gammaData = gamma->data<float>();
227  float* betaData = beta->data<float>();
228  float* outputData = output->data<float>();
229  mapArrayToAccel(ref::kBatchNormHw, "inputs", inputData,
230  inputShape.storageSize() * sizeof(float));
231  mapArrayToAccel(ref::kBatchNormHw, "mean", meanData,
232  kernelShape.storageSize() * sizeof(float));
233  mapArrayToAccel(ref::kBatchNormHw, "variance", varianceData,
234  kernelShape.storageSize() * sizeof(float));
235  mapArrayToAccel(ref::kBatchNormHw, "gamma", gammaData,
236  kernelShape.storageSize() * sizeof(float));
237  mapArrayToAccel(ref::kBatchNormHw, "beta", betaData,
238  kernelShape.storageSize() * sizeof(float));
239  mapArrayToAccel(ref::kBatchNormHw, "result", outputData,
240  outputShape.storageSize() * sizeof(float));
241  if (isPostConv) {
242  bool isNCHW = input->getShape().getLayout() == NCHW;
243  auto func = isNCHW ? ref_batch_norm_nchw_post_conv
245  invokeKernel(ref::kBatchNormHw, func, inputData, meanData, varianceData,
246  gammaData, betaData, outputData, inputShape[0],
247  inputShape[1], inputShape[2], inputShape[3],
248  inputShape.getPadding(3), kernelShape.getPadding(3),
249  actInfo.function, actInfo.params);
250  } else {
251  assert(inputShape.getLayout() == DataLayout::NC);
252  assert(outputShape.getLayout() == DataLayout::NC);
253  invokeKernel(ref::kBatchNormHw, ref_batch_norm_post_fc, inputData,
254  meanData, varianceData, gammaData, betaData, outputData,
255  inputShape[0], inputShape[1], inputShape.getPadding(1),
256  actInfo.function, actInfo.params);
257  }
258 }
259 
260 } // namespace smaug
ref_batch_norm_nhwc_post_conv
void ref_batch_norm_nhwc_post_conv(float *inputs, float *mean, float *variance, float *gamma, float *beta, float *result, int img_nums, int img_rows, int img_cols, int img_chans, int img_pad, int wgt_pad, activation_type act_function, activation_param_t act_params)
Definition: ref_batch_norm_op.cpp:147
activation_type
enum _activation_type activation_type
The activation function to apply to an operator's output in hardware.
smaug::dout
const DebugStream & dout(int debugLevel)
Returns a DebugStream instance for the given debug level.
Definition: debug_stream.cpp:16
ref_batch_norm_nchw_post_conv
void ref_batch_norm_nchw_post_conv(float *inputs, float *mean, float *variance, float *gamma, float *beta, float *result, int img_nums, int img_chans, int img_rows, int img_cols, int img_pad, int wgt_pad, activation_type act_function, activation_param_t act_params)
Definition: ref_batch_norm_op.cpp:85
_activation_param_t
Parameters to the activation function hardware.
Definition: common.h:194
ref_batch_norm_post_fc
void ref_batch_norm_post_fc(float *inputs, float *mean, float *variance, float *gamma, float *beta, float *result, int input_nums, int input_size, int input_pad, activation_type act_function, activation_param_t act_params)
Definition: ref_batch_norm_op.cpp:40
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
batch_norm_op
float batch_norm_op(float input, float mean, float recip_sqrt_var, float gamma, float beta)
Definition: ref_batch_norm_op.cpp:23
ALWAYS_INLINE
#define ALWAYS_INLINE
We have to disable all function inlining at the global level for Aladdin + LLVM-Tracer to work,...
Definition: common.h:521
smaug::mapArrayToAccel
void mapArrayToAccel(unsigned reqCode, const char *arrayName, void *baseAddr, size_t size)
Maps an array of data to the accelerator.
Definition: common.cpp:12
smaug::invokeKernel
void invokeKernel(int accelIdx, unsigned reqCode, const Kernel &kernel, Args &&... args)
The generic blocking interface for all accelerator kernel functions.
Definition: common.h:72