1 #include "smaug/core/backend.h" 
    3 #include "smaug/operators/inner_product_op.h" 
    4 #include "smaug/operators/ref/ref_activation_fun_op.h" 
    5 #include "smaug/utility/debug_stream.h" 
   39     int input_size = a_height * (a_width + a_pad);
 
   40     int weight_size = a_width * (b_width + b_pad);
 
   41     int result_size = a_height * (b_width + c_pad);
 
   42     dmaLoad(a, a, input_size * 
sizeof(
float));
 
   43     dmaLoad(b, b, weight_size * 
sizeof(
float));
 
   45     ARRAY_2D(
float, _a, a, a_width + a_pad);
 
   46     ARRAY_2D(
float, _b, b, b_width + b_pad);
 
   47     ARRAY_2D(
float, _c, c, b_width + c_pad);
 
   50     for (
int i = 0; i < a_height; i++) {
 
   52         for (
int j = 0; j < b_width; j++) {
 
   55             for (
int k = 0; k < a_width; k++) {
 
   56                 float a_val = _a[i][k];
 
   57                 float b_val = _b[k][j];
 
   58                 result += a_val * b_val;
 
   63     if (act_function != NO_ACTIVATION) {
 
   64         activation_fun(c, c, result_size, act_function, act_params);
 
   66     dmaLoad(c, c, result_size * 
sizeof(
float));
 
   98     int a_width = b_width;
 
   99     int input_size = a_height * (a_width + a_pad);
 
  100     int weight_size = b_height * (b_width + b_pad);
 
  101     int result_size = a_height * (b_height + c_pad);
 
  102     dmaLoad(a, a, input_size * 
sizeof(
float));
 
  103     dmaLoad(b, b, weight_size * 
sizeof(
float));
 
  105     ARRAY_2D(
float, _a, a, a_width);
 
  106     ARRAY_2D(
float, _b, b, b_width);
 
  107     ARRAY_2D(
float, _c, c, b_height);
 
  110     for (
int i = 0; i < a_height; i++) {
 
  112         for (
int j = 0; j < b_height; j++) {
 
  115             for (
int k = 0; k < a_width; k++) {
 
  116                 float a_val = _a[i][k];
 
  117                 float b_val = _b[j][k];
 
  118                 result += a_val * b_val;
 
  123     if (act_function != NO_ACTIVATION) {
 
  124         activation_fun(c, c, result_size, act_function, act_params);
 
  126     dmaLoad(c, c, result_size * 
sizeof(
float));
 
  136 void InnerProductOp<ReferenceBackend>::run() {
 
  137     auto input = getInput(Inputs);
 
  138     auto weights = getInput(Weights);
 
  139     auto output = getOutput(Outputs);
 
  140     const TensorShape& inputShape = input->getShape();
 
  141     const TensorShape& weightShape = weights->getShape();
 
  142     const TensorShape& outputShape = output->getShape();
 
  143     assert(inputShape.getLayout() == DataLayout::NC);
 
  144     assert(weightShape.getLayout() == DataLayout::NC ||
 
  145            weightShape.getLayout() == DataLayout::CN);
 
  146     assert(outputShape.getLayout() == DataLayout::NC);
 
  147     dout(2) << *weights << 
"\n";
 
  149     float* inputData = input->data<
float>();
 
  150     float* weightData = weights->data<
float>();
 
  151     float* outputData = output->data<
float>();
 
  153                     inputShape.storageSize() * 
sizeof(
float));
 
  155                     weightShape.storageSize() * 
sizeof(
float));
 
  157                     outputShape.storageSize() * 
sizeof(
float));
 
  158     bool weightsTransposed = weightShape.getLayout() == DataLayout::NC;
 
  161     int actIdx = weightsTransposed ? 1 : 0;
 
  162     int neuronIdx = weightsTransposed ? 0 : 1;
 
  163     invokeKernel(ref::kInnerProductHw, func, inputData, weightData, outputData,
 
  164                  inputShape[0], weightShape[actIdx], weightShape[neuronIdx],
 
  165                  inputShape.getPadding(1), weightShape.getPadding(1),
 
  166                  outputShape.getPadding(1), actInfo.function, actInfo.params);