1// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert PIXEL_TILE >= 1
7$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
8#include <assert.h>
9
10#include <xnnpack/ibilinear.h>
11
12
13void xnn_f32_ibilinear_chw_ukernel__scalar_p${PIXEL_TILE}(
14    size_t output_pixels,
15    size_t channels,
16    const float**restrict input,
17    size_t input_offset,
18    const float*restrict weights,
19    float*restrict output,
20    size_t input_increment)
21{
22  assert(output_pixels != 0);
23  assert(channels != 0);
24  assert(input_increment % sizeof(float) == 0);
25
26  size_t c = channels;
27  do {
28    const float** i = input;
29    const float* w = weights;
30
31    size_t p = output_pixels;
32    $if PIXEL_TILE > 1:
33      for (; p >= ${PIXEL_TILE}; p -= ${PIXEL_TILE}) {
34        $for P in range(PIXEL_TILE):
35          const float* itl${P} = (const float*) ((uintptr_t) i[${P * 2}] + input_offset);
36          const float* ibl${P} = (const float*) ((uintptr_t) i[${P * 2 + 1}] + input_offset);
37        i += ${PIXEL_TILE} * 2;
38
39        $for P in range(PIXEL_TILE):
40           const float valphah${ABC[P]} = w[${P * 2}];
41           const float valphav${ABC[P]} = w[${P * 2 + 1}];
42        w += ${PIXEL_TILE} * 2;
43
44        $for P in range(PIXEL_TILE):
45          const float vtl${ABC[P]} = itl${P}[0];
46          const float vtr${ABC[P]} = itl${P}[1];
47          const float vbl${ABC[P]} = ibl${P}[0];
48          const float vbr${ABC[P]} = ibl${P}[1];
49
50        $for P in range(PIXEL_TILE):
51          const float vtd${ABC[P]} = vtr${ABC[P]} - vtl${ABC[P]};
52          const float vbd${ABC[P]} = vbr${ABC[P]} - vbl${ABC[P]};
53
54        $for P in range(PIXEL_TILE):
55          const float vt${ABC[P]} = vtl${ABC[P]} + vtd${ABC[P]} * valphah${ABC[P]};
56          const float vb${ABC[P]} = vbl${ABC[P]} + vbd${ABC[P]} * valphah${ABC[P]};
57
58        $for P in range(PIXEL_TILE):
59          const float vd${ABC[P]} = vb${ABC[P]} - vt${ABC[P]};
60
61        $for P in range(PIXEL_TILE):
62          const float vo${ABC[P]} = vt${ABC[P]} + vd${ABC[P]} * valphav${ABC[P]};
63
64        $for P in range(PIXEL_TILE):
65          output[${P}] = vo${ABC[P]};
66        output += ${PIXEL_TILE};
67      }
68
69      for (; p >= 1; p -= 1) {
70        const float* itl = (const float*) ((uintptr_t) i[0] + input_offset);
71        const float* ibl = (const float*) ((uintptr_t) i[1] + input_offset);
72        i += 2;
73
74        const float valphah = w[0];
75        const float valphav = w[1];
76        w += 2;
77
78        const float vtl = itl[0];
79        const float vtr = itl[1];
80        const float vbl = ibl[0];
81        const float vbr = ibl[1];
82
83        const float vtd = vtr - vtl;
84        const float vbd = vbr - vbl;
85
86        const float vt = vtl + vtd * valphah;
87        const float vb = vbl + vbd * valphah;
88
89        const float vd = vb - vt;
90
91        const float vo = vt + vd * valphav;
92
93        *output++ = vo;
94      }
95    $else:
96      do {
97        const float* itl = (const float*) ((uintptr_t) i[0] + input_offset);
98        const float* ibl = (const float*) ((uintptr_t) i[1] + input_offset);
99        i += 2;
100
101        const float valphah = w[0];
102        const float valphav = w[1];
103        w += 2;
104
105        const float vtl = itl[0];
106        const float vtr = itl[1];
107        const float vbl = ibl[0];
108        const float vbr = ibl[1];
109
110        const float vtd = vtr - vtl;
111        const float vbd = vbr - vbl;
112
113        const float vt = vtl + vtd * valphah;
114        const float vb = vbl + vbd * valphah;
115
116        const float vd = vb - vt;
117
118        const float vo = vt + vd * valphav;
119
120        *output++ = vo;
121      } while (--p != 0);
122
123    input_offset += input_increment;
124
125    c--;
126  } while (c != 0);
127}
128