Home
last modified time | relevance | path

Searched refs:gemmlowp (Results 1 – 25 of 147) sorted by relevance

123456

/external/tensorflow/tensorflow/lite/kernels/
Dcpu_backend_gemm_gemmlowp.h41 using Type = gemmlowp::OutputStageSaturatingCastToUint8;
46 using Type = gemmlowp::OutputStageSaturatingCastToInt8;
51 using Type = gemmlowp::OutputStageSaturatingCastToInt16;
59 using Type = gemmlowp::L8R8WithLhsNonzeroBitDepthParams;
64 using Type = gemmlowp::SignedL8R8WithLhsNonzeroBitDepthParams;
88 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::RowMajor>
90 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::ColMajor>
92 gemmlowp::MatrixMap<DstScalar, gemmlowp::MapOrder::ColMajor> gemmlowp_dst(
96 gemmlowp::VectorMap<const int32, gemmlowp::VectorShape::Col>;
97 gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent scale_stage;
[all …]
/external/tensorflow/tensorflow/core/kernels/
Dmeta_support.cc79 gemmlowp::WorkersPool* GetWorkersPool() { in GetWorkersPool()
80 static gemmlowp::WorkersPool* pool = new gemmlowp::WorkersPool(); in GetWorkersPool()
96 typedef gemmlowp::meta::SimpleContext<gemmlowp::WorkersPool> LocalContext;
101 gemmlowp::meta::MultiThreadGemm< in MultiThreadGemm()
102 Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params, 1, in MultiThreadGemm()
106 gemmlowp::meta::MultiThreadGemm< in MultiThreadGemm()
107 Context, gemmlowp::meta::GemmExecutorPackRHSCacheFriendly<>, Params, in MultiThreadGemm()
110 gemmlowp::meta::MultiThreadGemm< in MultiThreadGemm()
111 Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params, in MultiThreadGemm()
122 typedef gemmlowp::meta::GemmParams< in QuantizedGemmImpl()
[all …]
Dquantized_matmul_op.cc41 static const gemmlowp::MapOrder ResultOrder = in GemmlowpMultiply()
42 !TransposeC ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; in GemmlowpMultiply()
43 static const gemmlowp::MapOrder LhsOrder = in GemmlowpMultiply()
44 !TransposeA ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; in GemmlowpMultiply()
45 static const gemmlowp::MapOrder RhsOrder = in GemmlowpMultiply()
46 !TransposeB ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; in GemmlowpMultiply()
47 gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs(a_data_as_uint8, m, k, in GemmlowpMultiply()
49 gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(b_data_as_uint8, k, n, in GemmlowpMultiply()
51 gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(c_data_as_int32, m, n, in GemmlowpMultiply()
58 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, in GemmlowpMultiply()
[all …]
Dquantized_conv_ops.cc409 static const gemmlowp::MapOrder ResultOrder = in operator ()()
410 !transpose_c ? gemmlowp::MapOrder::RowMajor in operator ()()
411 : gemmlowp::MapOrder::ColMajor; in operator ()()
412 static const gemmlowp::MapOrder LhsOrder = in operator ()()
413 !transpose_a ? gemmlowp::MapOrder::RowMajor in operator ()()
414 : gemmlowp::MapOrder::ColMajor; in operator ()()
415 static const gemmlowp::MapOrder RhsOrder = in operator ()()
416 !transpose_b ? gemmlowp::MapOrder::RowMajor in operator ()()
417 : gemmlowp::MapOrder::ColMajor; in operator ()()
418 gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs( in operator ()()
[all …]
/external/gemmlowp/doc/
Dquantization_example.cc26 template <typename tScalar, gemmlowp::MapOrder tOrder>
28 const gemmlowp::MatrixMap<tScalar, tOrder>& m) { in operator <<()
42 template <gemmlowp::MapOrder tOrder>
43 void FindMinMax(const gemmlowp::MatrixMap<float, tOrder>& m, float* min, in FindMinMax()
114 template <gemmlowp::MapOrder tLhsOrder, gemmlowp::MapOrder tRhsOrder,
115 gemmlowp::MapOrder tResultOrder>
117 const gemmlowp::MatrixMap<const float, tLhsOrder>& lhs, in FloatMatrixMultiplication()
118 const gemmlowp::MatrixMap<const float, tRhsOrder>& rhs, in FloatMatrixMultiplication()
119 gemmlowp::MatrixMap<float, tResultOrder>* result) { in FloatMatrixMultiplication()
153 template <typename tScalar, gemmlowp::MapOrder tOrder>
[all …]
Doutput.md1 # Output pipelines in gemmlowp
3 In gemmlowp, the "output pipeline" is the process that takes a final `int32`
20 and activation function. gemmlowp's output pipelines allow implementing that:
26 The gemmlowp entry point allowing to use an arbitrary output pipeline is
27 `GemmWithOutputPipeline` in [public/gemmlowp.h](../public/gemmlowp.h).
50 Separately, a self-contained example showing how to use gemmlowp to compute a
Dpublic.md3 gemmlowp's public interface is defined in
4 [public/gemmlowp.h](../public/gemmlowp.h).
46 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
47 gemmlowp::DefaultL8R8BitDepthParams>(
63 non-deprecated valid value is `gemmlowp::DefaultL8R8BitDepthParams`. See
78 must be `gemmlowp::GemmContext`.
84 * `context`: The `gemmlowp::GemmContext` object holding state and resources to
85 be used for this gemmlowp call.
92 will be performed by gemmlowp for the destination buffer. See
108 gemmlowp supports arbitrary combinations of storage orders for the LHS, RHS and
[all …]
Ddesign.md1 # Overview of gemmlowp design
5 gemmlowp, like most GEMMs, implements the straightforward matrix multiplication
57 ## Impact of low-precision computation on gemmlowp design
60 low-precision-computation paradigm and how it's implemented in gemmlowp.
70 stage is needed in gemmlowp, which we call "unpack". Thus we arrive at the
71 3-stage computation scheme that gemmlowp uses:
77 The pseudo-code overview of gemmlowp now looks like:
102 ## Exploring gemmlowp code
104 The design outlined above can be readily matched to gemmlowp source code, in
/external/tensorflow/tensorflow/lite/kernels/internal/reference/
Dtanh.h63 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; in Tanh()
65 using F3 = gemmlowp::FixedPoint<std::int16_t, 3>; in Tanh()
70 F0 output = gemmlowp::tanh(input); in Tanh()
76 gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); in Tanh()
77 F0 output = gemmlowp::tanh(input); in Tanh()
106 using FixedPoint4 = gemmlowp::FixedPoint<int32_t, 4>; in Tanh()
107 using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>; in Tanh()
109 const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); in Tanh()
111 using gemmlowp::RoundingDivideByPOT; in Tanh()
Dhard_swish.h78 gemmlowp::SaturatingRoundingDoublingHighMul( in HardSwish()
115 reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul( in HardSwish()
126 reluish_value = gemmlowp::RoundingDivideByPOT( in HardSwish()
152 int16_t output_value = gemmlowp::RoundingDivideByPOT( in HardSwish()
Dlogistic.h77 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; in Logistic()
79 using F3 = gemmlowp::FixedPoint<std::int16_t, 3>; in Logistic()
82 F0 output = gemmlowp::logistic(input); in Logistic()
/external/tensorflow/tensorflow/lite/kernels/internal/
Dcommon.h143 using gemmlowp::RoundingDivideByPOT; in MultiplyByQuantizedMultiplierSmallerThanOneExp()
144 using gemmlowp::SaturatingRoundingDoublingHighMul; in MultiplyByQuantizedMultiplierSmallerThanOneExp()
151 using gemmlowp::SaturatingRoundingDoublingHighMul; in MultiplyByQuantizedMultiplierGreaterThanOne()
159 using gemmlowp::RoundingDivideByPOT; in MultiplyByQuantizedMultiplier()
160 using gemmlowp::SaturatingRoundingDoublingHighMul; in MultiplyByQuantizedMultiplier()
411 gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp( in SaturatingAddNonGemmlowp()
412 gemmlowp::FixedPoint<tRawType, tIntegerBits> a, in SaturatingAddNonGemmlowp()
413 gemmlowp::FixedPoint<tRawType, tIntegerBits> b) { in SaturatingAddNonGemmlowp()
414 return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw( in SaturatingAddNonGemmlowp()
447 gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub( in SaturatingSub()
[all …]
/external/gemmlowp/test/
Dbenchmark_all_sizes.cc47 namespace gemmlowp { namespace
114 using namespace gemmlowp; in benchmark_8bit()
132 gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint in benchmark_8bit()
137 gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; in benchmark_8bit()
142 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t, BitDepthParams>( in benchmark_8bit()
152 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t, in benchmark_8bit()
166 using namespace gemmlowp; in benchmark_8bit_to_32bit()
184 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, BitDepthParams>( in benchmark_8bit_to_32bit()
194 gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, in benchmark_8bit_to_32bit()
237 return benchmark_8bit<gemmlowp::L8R8WithLhsNonzeroBitDepthParams>( in benchmark()
[all …]
Dbenchmark.cc53 namespace gemmlowp { namespace
176 gemmlowp::RegisterCurrentThreadForProfiling(); in benchmark()
177 gemmlowp::StartProfiling(); in benchmark()
197 gemmlowp::FinishProfiling(); in benchmark()
223 gemmlowp::RegisterCurrentThreadForProfiling(); in benchmark_gemm_sizes()
224 gemmlowp::StartProfiling(); in benchmark_gemm_sizes()
234 gemmlowp::FinishProfiling(); in benchmark_gemm_sizes()
339 gemmlowp::GemmContext context; in benchmark_all()
341 gemmlowp::benchmark_small_model(&context); in benchmark_all()
345 gemmlowp::GemmContext context; in benchmark_all()
[all …]
Dtest_fixedpoint.cc29 namespace gemmlowp { namespace
583 gemmlowp::TestFixedPoint<std::int32_t>().RunTests("Scalar int32"); in main()
584 gemmlowp::TestFixedPoint<std::int16_t>().RunTests("Scalar int16"); in main()
586 gemmlowp::TestFixedPoint<__m128i>().RunTests("SSE4 __m128i = int32x4"); in main()
587 gemmlowp::TestFixedPoint<gemmlowp::int16x8_m128i>().RunTests( in main()
591 gemmlowp::TestFixedPoint<int32x4_t>().RunTests("NEON int32x4_t"); in main()
592 gemmlowp::TestFixedPoint<int16x8_t>().RunTests("NEON int16x8_t"); in main()
595 gemmlowp::TestFixedPoint<v4i32>().RunTests("MSA v4i32"); in main()
596 gemmlowp::TestFixedPoint<v8i16>().RunTests("MSA v8i16"); in main()
599 gemmlowp::TestFixedPoint<__m256i>().RunTests("AVX __m256i"); in main()
[all …]
Dcorrectness_meta_gemm.cc178 gemmlowp::WorkersPool* pool, std::int32_t pool_size) { in test()
183 gemmlowp::meta::multi_thread_gemm_q8(pool, pool_size, scratch, lhs, rhs, m, n, in test()
190 gemmlowp::WorkersPool* pool, std::int32_t pool_size) { in test_f()
196 gemmlowp::meta::multi_thread_gemm_f(pool, pool_size, scratch, lhs, rhs, m, n, in test_f()
203 std::int32_t* result, gemmlowp::WorkersPool* pool, in test_i32()
209 gemmlowp::meta::multi_thread_gemm_i32(pool, pool_size, scratch, lhs, rhs, m, in test_i32()
217 gemmlowp::WorkersPool* pool, int t) { in q_suite()
230 std::uint8_t* right, float* result, gemmlowp::WorkersPool* pool, in f_suite()
245 gemmlowp::WorkersPool* pool, int t) { in i32_suite()
278 gemmlowp::WorkersPool pool; in main()
/external/tensorflow/tensorflow/lite/tools/cmake/modules/
Dgemmlowp.cmake16 if(TARGET gemmlowp OR gemmlowp_POPULATED)
23 gemmlowp
24 GIT_REPOSITORY https://github.com/google/gemmlowp
32 SOURCE_DIR "${CMAKE_BINARY_DIR}/gemmlowp"
35 OverridableFetchContent_GetProperties(gemmlowp)
37 OverridableFetchContent_Populate(gemmlowp)
42 "${CMAKE_CURRENT_LIST_DIR}/gemmlowp"
DFindgemmlowp.cmake18 include(gemmlowp)
21 get_target_property(GEMMLOWP_INCLUDE_DIRS gemmlowp INTERFACE_DIRECTORIES)
23 gemmlowp
/external/tensorflow/tensorflow/lite/tools/cmake/modules/gemmlowp/
DCMakeLists.txt16 project(gemmlowp CXX) project
19 "Whether to add sources to gemmlowp's interface library targets.
24 "Directory that contains the gemmlowp project"
30 # gemmlowp doesn't have a CMake project so this is transcribed from
31 # gemmlowp/BUILD.
55 add_library(gemmlowp INTERFACE) target
57 target_sources(gemmlowp INTERFACE ${GEMMLOWP_PUBLIC_HEADERS})
59 target_include_directories(gemmlowp INTERFACE "${GEMMLOWP_SOURCE_DIR}/public")
60 target_link_libraries(gemmlowp INTERFACE gemmlowp_private)
/external/XNNPACK/bench/
Dqu8-gemm.cc112 typedef gemmlowp::VectorMap<const int32_t, gemmlowp::VectorShape::Col> ColVectorMap;
114 gemmlowp::OutputStageBiasAddition<ColVectorMap>,
115 gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint,
116 gemmlowp::OutputStageClamp,
117 gemmlowp::OutputStageSaturatingCastToUint8>
130 gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage; in Make()
132 gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint quantize_down_stage; in Make()
136 gemmlowp::OutputStageClamp clamp_stage; in Make()
139 gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; in Make()
172 gemmlowp::MultiThreadGemmContext threadingContext; in GemmlowpBenchmark()
[all …]
/external/gemmlowp/
DREADME.md1 # gemmlowp: a small self-contained low-precision GEMM library
3 …[Build Status](https://secure.travis-ci.org/google/gemmlowp.png)](http://travis-ci.org/google/gemm…
22 gemmlowp-related discussion, about either development or usage, is welcome on
25 https://groups.google.com/forum/#!forum/gemmlowp
60 otherwise gemmlowp will use slow reference code. Bazel users can compile by
61 running `bazel build --copt=-msse4.1 //gemmlowp:all`. The compiled binary should
69 to run `bazel build --config=opt //gemmlowp:all` instead.
71 Details of what it takes to make an efficient port of gemmlowp, namely writing a
77 ### The gemmlowp public interface
79 gemmlowp's main public interface is in the `public/` subdirectory.
[all …]
DCONTRIBUTING25 Getting in touch with the gemmlowp community
28 The central point of communication around gemmlowp is the mailing list,
29 https://groups.google.com/forum/#!forum/gemmlowp
37 guidance. The gemmlowp mailing list is a good place for that.
46 https://github.com/google/gemmlowp
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Dlegacy_optimized_ops.h381 struct LegacyDepthwiseConvWorkerTask : public gemmlowp::Task {
442 uint8* output_data, gemmlowp::GemmContext* gemmlowp_context = nullptr) {
474 std::vector<gemmlowp::Task*> tasks(thread_count);
490 struct LegacyPerChannelDepthwiseConvWorkerTask : public gemmlowp::Task {
544 gemmlowp::GemmContext* gemmlowp_context = nullptr) {
578 std::vector<gemmlowp::Task*> tasks(thread_count);
715 typedef gemmlowp::VectorMap<const int32, gemmlowp::VectorShape::Col>
717 typedef std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
718 gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent,
719 gemmlowp::OutputStageClamp,
[all …]
/external/tensorflow/tensorflow/lite/kernels/internal/reference/integer_ops/
Dlog_softmax.h40 using F5 = gemmlowp::FixedPoint<int32, kInputIntegerBits>; in LogSoftmax()
41 using F12 = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>; in LogSoftmax()
61 gemmlowp::Rescale<kAccumulationIntegerBits>( in LogSoftmax()
91 gemmlowp::RoundingDivideByPOT( in LogSoftmax()
/external/gemmlowp/internal/
Dkernel_default.h26 namespace gemmlowp {
60 namespace gemmlowp { \
103 namespace gemmlowp {

123456