1 #include "smaug/core/backend.h"
3 #include "smaug/operators/inner_product_op.h"
4 #include "smaug/operators/ref/ref_activation_fun_op.h"
5 #include "smaug/utility/debug_stream.h"
39 int input_size = a_height * (a_width + a_pad);
40 int weight_size = a_width * (b_width + b_pad);
41 int result_size = a_height * (b_width + c_pad);
42 dmaLoad(a, a, input_size *
sizeof(
float));
43 dmaLoad(b, b, weight_size *
sizeof(
float));
45 ARRAY_2D(
float, _a, a, a_width + a_pad);
46 ARRAY_2D(
float, _b, b, b_width + b_pad);
47 ARRAY_2D(
float, _c, c, b_width + c_pad);
50 for (
int i = 0; i < a_height; i++) {
52 for (
int j = 0; j < b_width; j++) {
55 for (
int k = 0; k < a_width; k++) {
56 float a_val = _a[i][k];
57 float b_val = _b[k][j];
58 result += a_val * b_val;
63 if (act_function != NO_ACTIVATION) {
64 activation_fun(c, c, result_size, act_function, act_params);
66 dmaLoad(c, c, result_size *
sizeof(
float));
98 int a_width = b_width;
99 int input_size = a_height * (a_width + a_pad);
100 int weight_size = b_height * (b_width + b_pad);
101 int result_size = a_height * (b_height + c_pad);
102 dmaLoad(a, a, input_size *
sizeof(
float));
103 dmaLoad(b, b, weight_size *
sizeof(
float));
105 ARRAY_2D(
float, _a, a, a_width);
106 ARRAY_2D(
float, _b, b, b_width);
107 ARRAY_2D(
float, _c, c, b_height);
110 for (
int i = 0; i < a_height; i++) {
112 for (
int j = 0; j < b_height; j++) {
115 for (
int k = 0; k < a_width; k++) {
116 float a_val = _a[i][k];
117 float b_val = _b[j][k];
118 result += a_val * b_val;
123 if (act_function != NO_ACTIVATION) {
124 activation_fun(c, c, result_size, act_function, act_params);
126 dmaLoad(c, c, result_size *
sizeof(
float));
136 void InnerProductOp<ReferenceBackend>::run() {
137 auto input = getInput(Inputs);
138 auto weights = getInput(Weights);
139 auto output = getOutput(Outputs);
140 const TensorShape& inputShape = input->getShape();
141 const TensorShape& weightShape = weights->getShape();
142 const TensorShape& outputShape = output->getShape();
143 assert(inputShape.getLayout() == DataLayout::NC);
144 assert(weightShape.getLayout() == DataLayout::NC ||
145 weightShape.getLayout() == DataLayout::CN);
146 assert(outputShape.getLayout() == DataLayout::NC);
147 dout(2) << *weights <<
"\n";
149 float* inputData = input->data<
float>();
150 float* weightData = weights->data<
float>();
151 float* outputData = output->data<
float>();
153 inputShape.storageSize() *
sizeof(
float));
155 weightShape.storageSize() *
sizeof(
float));
157 outputShape.storageSize() *
sizeof(
float));
158 bool weightsTransposed = weightShape.getLayout() == DataLayout::NC;
161 int actIdx = weightsTransposed ? 1 : 0;
162 int neuronIdx = weightsTransposed ? 0 : 1;
163 invokeKernel(ref::kInnerProductHw, func, inputData, weightData, outputData,
164 inputShape[0], weightShape[actIdx], weightShape[neuronIdx],
165 inputShape.getPadding(1), weightShape.getPadding(1),
166 outputShape.getPadding(1), actInfo.function, actInfo.params);