1// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert BATCH_TILE >= 1
7$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
8$assert OP in ["ADD", "DIV", "RDIV", "MAX", "MIN", "MUL", "SUB", "RSUB", "SQRDIFF"]
9$assert ACTIVATION in ["LINEAR", "MINMAX", "RELU"]
10#include <assert.h>
11
12#include <xnnpack/common.h>
13#include <xnnpack/math.h>
14#include <xnnpack/vbinary.h>
15
16
17$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32"
18$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32"
19$OP_FUNC = {
20$  "ADD": lambda x: "%s + vb" % x,
21$  "DIV": lambda x: "%s / vb" % x,
22$  "RDIV": lambda x: "vb / %s" % x,
23$  "MAX": lambda x: "%s(%s, vb)" % (MAX_F32, x),
24$  "MIN": lambda x: "%s(%s, vb)" % (MIN_F32, x),
25$  "MUL": lambda x: "%s * vb" % x,
26$  "SUB": lambda x: "%s - vb" % x,
27$  "RSUB": lambda x: "vb - %s" % x,
28$  "SQRDIFF": lambda x: "%s - vb" % x,
29$}[OP]
30$SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION]
31$PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION]
32void xnn_f32_v${OP.lower()}c${SUFFIX}_ukernel__${"wasm" if WASM else "scalar"}_x${BATCH_TILE}(
33    size_t n,
34    const float* a,
35    const float* b,
36    float* y,
37    const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)])
38{
39  assert(n != 0);
40  assert(n % sizeof(float) == 0);
41  assert(a != NULL);
42  assert(b != NULL);
43  assert(y != NULL);
44
45  $if ACTIVATION == "MINMAX":
46    const float vy_min = params->scalar.min;
47    const float vy_max = params->scalar.max;
48
49  const float vb = *b;
50  $if BATCH_TILE > 1:
51    for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
52      $for N in range(BATCH_TILE):
53        const float va${ABC[N]} = a[${N}];
54      a += ${BATCH_TILE};
55
56      $for N in range(BATCH_TILE):
57        float vy${ABC[N]} = ${OP_FUNC("va" + ABC[N])};
58
59      $if OP == "SQRDIFF":
60        $for N in range(BATCH_TILE):
61          vy${ABC[N]} = vy${ABC[N]} * vy${ABC[N]};
62
63      $if ACTIVATION == "MINMAX":
64        $for N in range(BATCH_TILE):
65          vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, vy_min);
66
67        $for N in range(BATCH_TILE):
68          vy${ABC[N]} = ${MIN_F32}(vy${ABC[N]}, vy_max);
69      $elif ACTIVATION == "RELU":
70        $for N in range(BATCH_TILE):
71          vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, 0.0f);
72
73      $for N in range(BATCH_TILE):
74        y[${N}] = vy${ABC[N]};
75      y += ${BATCH_TILE};
76    }
77    if XNN_UNLIKELY(n != 0) {
78      $if BATCH_TILE > 2:
79        do {
80          const float va = *a++;
81          float vy = ${OP_FUNC("va")};
82          $if OP == "SQRDIFF":
83            vy = vy * vy;
84          $if ACTIVATION == "MINMAX":
85            vy = ${MAX_F32}(vy, vy_min);
86            vy = ${MIN_F32}(vy, vy_max);
87          $elif ACTIVATION == "RELU":
88            vy = ${MAX_F32}(vy, 0.0f);
89          *y++ = vy;
90          n -= sizeof(float);
91        } while (n != 0);
92      $else:
93        const float va = *a;
94        float vy = ${OP_FUNC("va")};
95        $if OP == "SQRDIFF":
96          vy = vy * vy;
97        $if ACTIVATION == "MINMAX":
98          vy = ${MAX_F32}(vy, vy_min);
99          vy = ${MIN_F32}(vy, vy_max);
100        $elif ACTIVATION == "RELU":
101          vy = ${MAX_F32}(vy, 0.0f);
102        *y = vy;
103    }
104  $else:
105    for (; n >= sizeof(float); n -= sizeof(float)) {
106      const float va = *a++;
107      float vy = ${OP_FUNC("va")};
108      $if OP == "SQRDIFF":
109        vy = vy * vy;
110      $if ACTIVATION == "MINMAX":
111        vy = ${MAX_F32}(vy, vy_min);
112        vy = ${MIN_F32}(vy, vy_max);
113      $elif ACTIVATION == "RELU":
114        vy = ${MAX_F32}(vy, 0.0f);
115      *y++ = vy;
116    }
117}
118