/external/tensorflow/tensorflow/lite/kernels/ |
D | cpu_backend_gemm_gemmlowp.h | 41 using Type = gemmlowp::OutputStageSaturatingCastToUint8; 46 using Type = gemmlowp::OutputStageSaturatingCastToInt8; 51 using Type = gemmlowp::OutputStageSaturatingCastToInt16; 59 using Type = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; 64 using Type = gemmlowp::SignedL8R8WithLhsNonzeroBitDepthParams; 88 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::RowMajor> 90 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::ColMajor> 92 gemmlowp::MatrixMap<DstScalar, gemmlowp::MapOrder::ColMajor> gemmlowp_dst( 96 gemmlowp::VectorMap<const int32, gemmlowp::VectorShape::Col>; 97 gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent scale_stage; [all …]
|
/external/tensorflow/tensorflow/core/kernels/ |
D | meta_support.cc | 79 gemmlowp::WorkersPool* GetWorkersPool() { in GetWorkersPool() 80 static gemmlowp::WorkersPool* pool = new gemmlowp::WorkersPool(); in GetWorkersPool() 96 typedef gemmlowp::meta::SimpleContext<gemmlowp::WorkersPool> LocalContext; 101 gemmlowp::meta::MultiThreadGemm< in MultiThreadGemm() 102 Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params, 1, in MultiThreadGemm() 106 gemmlowp::meta::MultiThreadGemm< in MultiThreadGemm() 107 Context, gemmlowp::meta::GemmExecutorPackRHSCacheFriendly<>, Params, in MultiThreadGemm() 110 gemmlowp::meta::MultiThreadGemm< in MultiThreadGemm() 111 Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params, in MultiThreadGemm() 122 typedef gemmlowp::meta::GemmParams< in QuantizedGemmImpl() [all …]
|
D | quantized_matmul_op.cc | 41 static const gemmlowp::MapOrder ResultOrder = in GemmlowpMultiply() 42 !TransposeC ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; in GemmlowpMultiply() 43 static const gemmlowp::MapOrder LhsOrder = in GemmlowpMultiply() 44 !TransposeA ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; in GemmlowpMultiply() 45 static const gemmlowp::MapOrder RhsOrder = in GemmlowpMultiply() 46 !TransposeB ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; in GemmlowpMultiply() 47 gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs(a_data_as_uint8, m, k, in GemmlowpMultiply() 49 gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(b_data_as_uint8, k, n, in GemmlowpMultiply() 51 gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(c_data_as_int32, m, n, in GemmlowpMultiply() 58 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, in GemmlowpMultiply() [all …]
|
D | quantized_conv_ops.cc | 409 static const gemmlowp::MapOrder ResultOrder = in operator ()() 410 !transpose_c ? gemmlowp::MapOrder::RowMajor in operator ()() 411 : gemmlowp::MapOrder::ColMajor; in operator ()() 412 static const gemmlowp::MapOrder LhsOrder = in operator ()() 413 !transpose_a ? gemmlowp::MapOrder::RowMajor in operator ()() 414 : gemmlowp::MapOrder::ColMajor; in operator ()() 415 static const gemmlowp::MapOrder RhsOrder = in operator ()() 416 !transpose_b ? gemmlowp::MapOrder::RowMajor in operator ()() 417 : gemmlowp::MapOrder::ColMajor; in operator ()() 418 gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs( in operator ()() [all …]
|
/external/gemmlowp/doc/ |
D | quantization_example.cc | 26 template <typename tScalar, gemmlowp::MapOrder tOrder> 28 const gemmlowp::MatrixMap<tScalar, tOrder>& m) { in operator <<() 42 template <gemmlowp::MapOrder tOrder> 43 void FindMinMax(const gemmlowp::MatrixMap<float, tOrder>& m, float* min, in FindMinMax() 114 template <gemmlowp::MapOrder tLhsOrder, gemmlowp::MapOrder tRhsOrder, 115 gemmlowp::MapOrder tResultOrder> 117 const gemmlowp::MatrixMap<const float, tLhsOrder>& lhs, in FloatMatrixMultiplication() 118 const gemmlowp::MatrixMap<const float, tRhsOrder>& rhs, in FloatMatrixMultiplication() 119 gemmlowp::MatrixMap<float, tResultOrder>* result) { in FloatMatrixMultiplication() 153 template <typename tScalar, gemmlowp::MapOrder tOrder> [all …]
|
D | output.md | 1 # Output pipelines in gemmlowp 3 In gemmlowp, the "output pipeline" is the process that takes a final `int32` 20 and activation function. gemmlowp's output pipelines allow implementing that: 26 The gemmlowp entry point allowing to use an arbitrary output pipeline is 27 `GemmWithOutputPipeline` in [public/gemmlowp.h](../public/gemmlowp.h). 50 Separately, a self-contained example showing how to use gemmlowp to compute a
|
D | public.md | 3 gemmlowp's public interface is defined in 4 [public/gemmlowp.h](../public/gemmlowp.h). 46 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t, 47 gemmlowp::DefaultL8R8BitDepthParams>( 63 non-deprecated valid value is `gemmlowp::DefaultL8R8BitDepthParams`. See 78 must be `gemmlowp::GemmContext`. 84 * `context`: The `gemmlowp::GemmContext` object holding state and resources to 85 be used for this gemmlowp call. 92 will be performed by gemmlowp for the destination buffer. See 108 gemmlowp supports arbitrary combinations of storage orders for the LHS, RHS and [all …]
|
D | design.md | 1 # Overview of gemmlowp design 5 gemmlowp, like most GEMMs, implements the straightforward matrix multiplication 57 ## Impact of low-precision computation on gemmlowp design 60 low-precision-computation paradigm and how it's implemented in gemmlowp. 70 stage is needed in gemmlowp, which we call "unpack". Thus we arrive at the 71 3-stage computation scheme that gemmlowp uses: 77 The pseudo-code overview of gemmlowp now looks like: 102 ## Exploring gemmlowp code 104 The design outlined above can be readily matched to gemmlowp source code, in
|
/external/tensorflow/tensorflow/lite/kernels/internal/reference/ |
D | tanh.h | 63 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; in Tanh() 65 using F3 = gemmlowp::FixedPoint<std::int16_t, 3>; in Tanh() 70 F0 output = gemmlowp::tanh(input); in Tanh() 76 gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); in Tanh() 77 F0 output = gemmlowp::tanh(input); in Tanh() 106 using FixedPoint4 = gemmlowp::FixedPoint<int32_t, 4>; in Tanh() 107 using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>; in Tanh() 109 const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); in Tanh() 111 using gemmlowp::RoundingDivideByPOT; in Tanh()
|
D | hard_swish.h | 78 gemmlowp::SaturatingRoundingDoublingHighMul( in HardSwish() 115 reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul( in HardSwish() 126 reluish_value = gemmlowp::RoundingDivideByPOT( in HardSwish() 152 int16_t output_value = gemmlowp::RoundingDivideByPOT( in HardSwish()
|
D | logistic.h | 77 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; in Logistic() 79 using F3 = gemmlowp::FixedPoint<std::int16_t, 3>; in Logistic() 82 F0 output = gemmlowp::logistic(input); in Logistic()
|
/external/tensorflow/tensorflow/lite/kernels/internal/ |
D | common.h | 143 using gemmlowp::RoundingDivideByPOT; in MultiplyByQuantizedMultiplierSmallerThanOneExp() 144 using gemmlowp::SaturatingRoundingDoublingHighMul; in MultiplyByQuantizedMultiplierSmallerThanOneExp() 151 using gemmlowp::SaturatingRoundingDoublingHighMul; in MultiplyByQuantizedMultiplierGreaterThanOne() 159 using gemmlowp::RoundingDivideByPOT; in MultiplyByQuantizedMultiplier() 160 using gemmlowp::SaturatingRoundingDoublingHighMul; in MultiplyByQuantizedMultiplier() 411 gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp( in SaturatingAddNonGemmlowp() 412 gemmlowp::FixedPoint<tRawType, tIntegerBits> a, in SaturatingAddNonGemmlowp() 413 gemmlowp::FixedPoint<tRawType, tIntegerBits> b) { in SaturatingAddNonGemmlowp() 414 return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw( in SaturatingAddNonGemmlowp() 447 gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub( in SaturatingSub() [all …]
|
/external/gemmlowp/test/ |
D | benchmark_all_sizes.cc | 47 namespace gemmlowp { namespace 114 using namespace gemmlowp; in benchmark_8bit() 132 gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint in benchmark_8bit() 137 gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; in benchmark_8bit() 142 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t, BitDepthParams>( in benchmark_8bit() 152 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t, in benchmark_8bit() 166 using namespace gemmlowp; in benchmark_8bit_to_32bit() 184 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, BitDepthParams>( in benchmark_8bit_to_32bit() 194 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, in benchmark_8bit_to_32bit() 237 return benchmark_8bit<gemmlowp::L8R8WithLhsNonzeroBitDepthParams>( in benchmark() [all …]
|
D | benchmark.cc | 53 namespace gemmlowp { namespace 176 gemmlowp::RegisterCurrentThreadForProfiling(); in benchmark() 177 gemmlowp::StartProfiling(); in benchmark() 197 gemmlowp::FinishProfiling(); in benchmark() 223 gemmlowp::RegisterCurrentThreadForProfiling(); in benchmark_gemm_sizes() 224 gemmlowp::StartProfiling(); in benchmark_gemm_sizes() 234 gemmlowp::FinishProfiling(); in benchmark_gemm_sizes() 339 gemmlowp::GemmContext context; in benchmark_all() 341 gemmlowp::benchmark_small_model(&context); in benchmark_all() 345 gemmlowp::GemmContext context; in benchmark_all() [all …]
|
D | test_fixedpoint.cc | 29 namespace gemmlowp { namespace 583 gemmlowp::TestFixedPoint<std::int32_t>().RunTests("Scalar int32"); in main() 584 gemmlowp::TestFixedPoint<std::int16_t>().RunTests("Scalar int16"); in main() 586 gemmlowp::TestFixedPoint<__m128i>().RunTests("SSE4 __m128i = int32x4"); in main() 587 gemmlowp::TestFixedPoint<gemmlowp::int16x8_m128i>().RunTests( in main() 591 gemmlowp::TestFixedPoint<int32x4_t>().RunTests("NEON int32x4_t"); in main() 592 gemmlowp::TestFixedPoint<int16x8_t>().RunTests("NEON int16x8_t"); in main() 595 gemmlowp::TestFixedPoint<v4i32>().RunTests("MSA v4i32"); in main() 596 gemmlowp::TestFixedPoint<v8i16>().RunTests("MSA v8i16"); in main() 599 gemmlowp::TestFixedPoint<__m256i>().RunTests("AVX __m256i"); in main() [all …]
|
D | correctness_meta_gemm.cc | 178 gemmlowp::WorkersPool* pool, std::int32_t pool_size) { in test() 183 gemmlowp::meta::multi_thread_gemm_q8(pool, pool_size, scratch, lhs, rhs, m, n, in test() 190 gemmlowp::WorkersPool* pool, std::int32_t pool_size) { in test_f() 196 gemmlowp::meta::multi_thread_gemm_f(pool, pool_size, scratch, lhs, rhs, m, n, in test_f() 203 std::int32_t* result, gemmlowp::WorkersPool* pool, in test_i32() 209 gemmlowp::meta::multi_thread_gemm_i32(pool, pool_size, scratch, lhs, rhs, m, in test_i32() 217 gemmlowp::WorkersPool* pool, int t) { in q_suite() 230 std::uint8_t* right, float* result, gemmlowp::WorkersPool* pool, in f_suite() 245 gemmlowp::WorkersPool* pool, int t) { in i32_suite() 278 gemmlowp::WorkersPool pool; in main()
|
/external/tensorflow/tensorflow/lite/tools/cmake/modules/ |
D | gemmlowp.cmake | 16 if(TARGET gemmlowp OR gemmlowp_POPULATED) 23 gemmlowp 24 GIT_REPOSITORY https://github.com/google/gemmlowp 32 SOURCE_DIR "${CMAKE_BINARY_DIR}/gemmlowp" 35 OverridableFetchContent_GetProperties(gemmlowp) 37 OverridableFetchContent_Populate(gemmlowp) 42 "${CMAKE_CURRENT_LIST_DIR}/gemmlowp"
|
D | Findgemmlowp.cmake | 18 include(gemmlowp) 21 get_target_property(GEMMLOWP_INCLUDE_DIRS gemmlowp INTERFACE_DIRECTORIES) 23 gemmlowp
|
/external/tensorflow/tensorflow/lite/tools/cmake/modules/gemmlowp/ |
D | CMakeLists.txt | 16 project(gemmlowp CXX) project 19 "Whether to add sources to gemmlowp's interface library targets. 24 "Directory that contains the gemmlowp project" 30 # gemmlowp doesn't have a CMake project so this is transcribed from 31 # gemmlowp/BUILD. 55 add_library(gemmlowp INTERFACE) target 57 target_sources(gemmlowp INTERFACE ${GEMMLOWP_PUBLIC_HEADERS}) 59 target_include_directories(gemmlowp INTERFACE "${GEMMLOWP_SOURCE_DIR}/public") 60 target_link_libraries(gemmlowp INTERFACE gemmlowp_private)
|
/external/XNNPACK/bench/ |
D | qu8-gemm.cc | 112 typedef gemmlowp::VectorMap<const int32_t, gemmlowp::VectorShape::Col> ColVectorMap; 114 gemmlowp::OutputStageBiasAddition<ColVectorMap>, 115 gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint, 116 gemmlowp::OutputStageClamp, 117 gemmlowp::OutputStageSaturatingCastToUint8> 130 gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage; in Make() 132 gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint quantize_down_stage; in Make() 136 gemmlowp::OutputStageClamp clamp_stage; in Make() 139 gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; in Make() 172 gemmlowp::MultiThreadGemmContext threadingContext; in GemmlowpBenchmark() [all …]
|
/external/gemmlowp/ |
D | README.md | 1 # gemmlowp: a small self-contained low-precision GEMM library 3 …[Build Status](https://secure.travis-ci.org/google/gemmlowp.png)](http://travis-ci.org/google/gemm… 22 gemmlowp-related discussion, about either development or usage, is welcome on 25 https://groups.google.com/forum/#!forum/gemmlowp 60 otherwise gemmlowp will use slow reference code. Bazel users can compile by 61 running `bazel build --copt=-msse4.1 //gemmlowp:all`. The compiled binary should 69 to run `bazel build --config=opt //gemmlowp:all` instead. 71 Details of what it takes to make an efficient port of gemmlowp, namely writing a 77 ### The gemmlowp public interface 79 gemmlowp's main public interface is in the `public/` subdirectory. [all …]
|
D | CONTRIBUTING | 25 Getting in touch with the gemmlowp community 28 The central point of communication around gemmlowp is the mailing list, 29 https://groups.google.com/forum/#!forum/gemmlowp 37 guidance. The gemmlowp mailing list is a good place for that. 46 https://github.com/google/gemmlowp
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | legacy_optimized_ops.h | 381 struct LegacyDepthwiseConvWorkerTask : public gemmlowp::Task { 442 uint8* output_data, gemmlowp::GemmContext* gemmlowp_context = nullptr) { 474 std::vector<gemmlowp::Task*> tasks(thread_count); 490 struct LegacyPerChannelDepthwiseConvWorkerTask : public gemmlowp::Task { 544 gemmlowp::GemmContext* gemmlowp_context = nullptr) { 578 std::vector<gemmlowp::Task*> tasks(thread_count); 715 typedef gemmlowp::VectorMap<const int32, gemmlowp::VectorShape::Col> 717 typedef std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>, 718 gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, 719 gemmlowp::OutputStageClamp, [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/reference/integer_ops/ |
D | log_softmax.h | 40 using F5 = gemmlowp::FixedPoint<int32, kInputIntegerBits>; in LogSoftmax() 41 using F12 = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>; in LogSoftmax() 61 gemmlowp::Rescale<kAccumulationIntegerBits>( in LogSoftmax() 91 gemmlowp::RoundingDivideByPOT( in LogSoftmax()
|
/external/gemmlowp/internal/ |
D | kernel_default.h | 26 namespace gemmlowp { 60 namespace gemmlowp { \ 103 namespace gemmlowp {
|