// Copyright 2020 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. $assert PIXEL_TILE >= 1 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" #include #include void xnn_f32_ibilinear_chw_ukernel__scalar_p${PIXEL_TILE}( size_t output_pixels, size_t channels, const float**restrict input, size_t input_offset, const float*restrict weights, float*restrict output, size_t input_increment) { assert(output_pixels != 0); assert(channels != 0); assert(input_increment % sizeof(float) == 0); size_t c = channels; do { const float** i = input; const float* w = weights; size_t p = output_pixels; $if PIXEL_TILE > 1: for (; p >= ${PIXEL_TILE}; p -= ${PIXEL_TILE}) { $for P in range(PIXEL_TILE): const float* itl${P} = (const float*) ((uintptr_t) i[${P * 2}] + input_offset); const float* ibl${P} = (const float*) ((uintptr_t) i[${P * 2 + 1}] + input_offset); i += ${PIXEL_TILE} * 2; $for P in range(PIXEL_TILE): const float valphah${ABC[P]} = w[${P * 2}]; const float valphav${ABC[P]} = w[${P * 2 + 1}]; w += ${PIXEL_TILE} * 2; $for P in range(PIXEL_TILE): const float vtl${ABC[P]} = itl${P}[0]; const float vtr${ABC[P]} = itl${P}[1]; const float vbl${ABC[P]} = ibl${P}[0]; const float vbr${ABC[P]} = ibl${P}[1]; $for P in range(PIXEL_TILE): const float vtd${ABC[P]} = vtr${ABC[P]} - vtl${ABC[P]}; const float vbd${ABC[P]} = vbr${ABC[P]} - vbl${ABC[P]}; $for P in range(PIXEL_TILE): const float vt${ABC[P]} = vtl${ABC[P]} + vtd${ABC[P]} * valphah${ABC[P]}; const float vb${ABC[P]} = vbl${ABC[P]} + vbd${ABC[P]} * valphah${ABC[P]}; $for P in range(PIXEL_TILE): const float vd${ABC[P]} = vb${ABC[P]} - vt${ABC[P]}; $for P in range(PIXEL_TILE): const float vo${ABC[P]} = vt${ABC[P]} + vd${ABC[P]} * valphav${ABC[P]}; $for P in range(PIXEL_TILE): output[${P}] = vo${ABC[P]}; output += ${PIXEL_TILE}; } for (; p >= 1; p -= 1) { const float* itl = (const float*) ((uintptr_t) i[0] + input_offset); const float* ibl = (const float*) ((uintptr_t) i[1] + input_offset); i += 2; const float valphah = w[0]; const float valphav = w[1]; w += 2; const float vtl = itl[0]; const float vtr = itl[1]; const float vbl = ibl[0]; const float vbr = ibl[1]; const float vtd = vtr - vtl; const float vbd = vbr - vbl; const float vt = vtl + vtd * valphah; const float vb = vbl + vbd * valphah; const float vd = vb - vt; const float vo = vt + vd * valphav; *output++ = vo; } $else: do { const float* itl = (const float*) ((uintptr_t) i[0] + input_offset); const float* ibl = (const float*) ((uintptr_t) i[1] + input_offset); i += 2; const float valphah = w[0]; const float valphav = w[1]; w += 2; const float vtl = itl[0]; const float vtr = itl[1]; const float vbl = ibl[0]; const float vbr = ibl[1]; const float vtd = vtr - vtl; const float vbd = vbr - vbl; const float vt = vtl + vtd * valphah; const float vb = vbl + vbd * valphah; const float vd = vb - vt; const float vo = vt + vd * valphav; *output++ = vo; } while (--p != 0); input_offset += input_increment; c--; } while (c != 0); }