1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // instrumentation.h: contains the definitions needed to
16 // instrument code for profiling:
17 //   ScopedProfilingLabel, RegisterCurrentThreadForProfiling.
18 //
19 // profiler.h is only needed to drive the profiler:
20 //   StartProfiling, FinishProfiling.
21 //
22 // See the usage example in profiler.h.
23 
24 #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_
25 #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_
26 
27 #include <cstdio>
28 
29 #ifndef GEMMLOWP_USE_STLPORT
30 #include <cstdint>
31 #else
32 #include <stdint.h>
33 namespace std {
34 using ::int16_t;
35 using ::int32_t;
36 using ::int8_t;
37 using ::size_t;
38 using ::uint16_t;
39 using ::uint32_t;
40 using ::uint8_t;
41 using ::uintptr_t;
42 }  // namespace std
43 #endif
44 
45 #include <algorithm>
46 #include <cassert>
47 #include <cstdlib>
48 
49 #ifdef GEMMLOWP_PROFILING
50 #include <cstring>
51 #include <set>
52 #endif
53 
54 #include "./pthread_everywhere.h"
55 
56 namespace gemmlowp {
57 
ReleaseBuildAssertion(bool condition,const char * msg)58 inline void ReleaseBuildAssertion(bool condition, const char* msg) {
59   if (!condition) {
60     fprintf(stderr, "gemmlowp error: %s\n", msg);
61     abort();
62   }
63 }
64 
65 class Mutex {
66  public:
67   Mutex(const Mutex&) = delete;
68   Mutex& operator=(const Mutex&) = delete;
69 
Mutex()70   Mutex() { pthread_mutex_init(&m, NULL); }
~Mutex()71   ~Mutex() { pthread_mutex_destroy(&m); }
72 
Lock()73   void Lock() { pthread_mutex_lock(&m); }
Unlock()74   void Unlock() { pthread_mutex_unlock(&m); }
75 
76  private:
77   pthread_mutex_t m;
78 };
79 
80 class GlobalMutexes {
81  public:
Profiler()82   static Mutex* Profiler() {
83     static Mutex m;
84     return &m;
85   }
86 
EightBitIntGemm()87   static Mutex* EightBitIntGemm() {
88     static Mutex m;
89     return &m;
90   }
91 };
92 
93 // A very simple RAII helper to lock and unlock a Mutex
94 struct ScopedLock {
ScopedLockScopedLock95   ScopedLock(Mutex* m) : _m(m) { _m->Lock(); }
~ScopedLockScopedLock96   ~ScopedLock() { _m->Unlock(); }
97 
98  private:
99   Mutex* _m;
100 };
101 
102 // Profiling definitions. Two paths: when profiling is enabled,
103 // and when profiling is disabled.
104 #ifdef GEMMLOWP_PROFILING
105 // This code path is when profiling is enabled.
106 
107 // A pseudo-call-stack. Contrary to a real call-stack, this only
108 // contains pointers to literal strings that were manually entered
109 // in the instrumented code (see ScopedProfilingLabel).
110 struct ProfilingStack {
111   static const std::size_t kMaxSize = 30;
112   typedef const char* LabelsArrayType[kMaxSize];
113   LabelsArrayType labels;
114   std::size_t size;
115   Mutex* lock;
116 
ProfilingStackProfilingStack117   ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
~ProfilingStackProfilingStack118   ~ProfilingStack() { delete lock; }
119 
PushProfilingStack120   void Push(const char* label) {
121     ScopedLock sl(lock);
122     ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
123     labels[size] = label;
124     size++;
125   }
126 
PopProfilingStack127   void Pop() {
128     ScopedLock sl(lock);
129     ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
130     size--;
131   }
132 
UpdateTopProfilingStack133   void UpdateTop(const char* new_label) {
134     ScopedLock sl(lock);
135     assert(size);
136     labels[size - 1] = new_label;
137   }
138 
139   ProfilingStack& operator=(const ProfilingStack& other) {
140     memcpy(this, &other, sizeof(ProfilingStack));
141     return *this;
142   }
143 
144   bool operator==(const ProfilingStack& other) const {
145     return !memcmp(this, &other, sizeof(ProfilingStack));
146   }
147 };
148 
149 static_assert(
150     !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)),
151     "ProfilingStack should have power-of-two size to fit in cache lines");
152 
153 struct ThreadInfo;
154 
155 // The global set of threads being profiled.
ThreadsUnderProfiling()156 inline std::set<ThreadInfo*>& ThreadsUnderProfiling() {
157   static std::set<ThreadInfo*> v;
158   return v;
159 }
160 
161 struct ThreadInfo {
162   pthread_key_t key;  // used only to get a callback at thread exit.
163   ProfilingStack stack;
164 
ThreadInfoThreadInfo165   ThreadInfo() {
166     pthread_key_create(&key, ThreadExitCallback);
167     pthread_setspecific(key, this);
168     stack.lock = new Mutex();
169   }
170 
ThreadExitCallbackThreadInfo171   static void ThreadExitCallback(void* ptr) {
172     ScopedLock sl(GlobalMutexes::Profiler());
173     ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
174     ThreadsUnderProfiling().erase(self);
175   }
176 };
177 
ThreadLocalThreadInfo()178 inline ThreadInfo& ThreadLocalThreadInfo() {
179   static pthread_key_t key;
180   static auto DeleteThreadInfo = [](void* threadInfoPtr) {
181     ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
182     if (threadInfo) {
183       delete threadInfo;
184     }
185   };
186 
187   // key_result is unused. The purpose of this 'static' local object is
188   // to have its initializer (the pthread_key_create call) performed exactly
189   // once, in a way that is guaranteed (since C++11) to be reentrant.
190   static const int key_result = pthread_key_create(&key, DeleteThreadInfo);
191   (void)key_result;
192 
193   ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
194   if (!threadInfo) {
195     threadInfo = new ThreadInfo();
196     pthread_setspecific(key, threadInfo);
197   }
198   return *threadInfo;
199 }
200 
201 // ScopedProfilingLabel is how one instruments code for profiling
202 // with this profiler. Construct local ScopedProfilingLabel variables,
203 // passing a literal string describing the local code. Profile
204 // samples will then be annotated with this label, while it is in scope
205 // (whence the name --- also known as RAII).
206 // See the example in profiler.h.
207 class ScopedProfilingLabel {
208   ProfilingStack* profiling_stack_;
209 
210  public:
ScopedProfilingLabel(const char * label)211   explicit ScopedProfilingLabel(const char* label)
212       : profiling_stack_(&ThreadLocalThreadInfo().stack) {
213     profiling_stack_->Push(label);
214   }
215 
~ScopedProfilingLabel()216   ~ScopedProfilingLabel() { profiling_stack_->Pop(); }
217 
Update(const char * new_label)218   void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); }
219 };
220 
221 // To be called once on each thread to be profiled.
RegisterCurrentThreadForProfiling()222 inline void RegisterCurrentThreadForProfiling() {
223   ScopedLock sl(GlobalMutexes::Profiler());
224   ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
225 }
226 
227 #else  // not GEMMLOWP_PROFILING
228 // This code path is when profiling is disabled.
229 
230 // This empty definition of ScopedProfilingLabel ensures that
231 // it has zero runtime overhead when profiling is disabled.
232 struct ScopedProfilingLabel {
ScopedProfilingLabelScopedProfilingLabel233   explicit ScopedProfilingLabel(const char*) {}
UpdateScopedProfilingLabel234   void Update(const char*) {}
235 };
236 
RegisterCurrentThreadForProfiling()237 inline void RegisterCurrentThreadForProfiling() {}
238 
239 #endif
240 
241 }  // end namespace gemmlowp
242 
243 #endif  // GEMMLOWP_PROFILING_INSTRUMENTATION_H_
244