1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <atomic>
20 #include <condition_variable>
21 #include <deque>
22 #include <functional>
23 #include <map>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #include <thread>
28 #include <vector>
29 
30 #include <android-base/thread_annotations.h>
31 
32 #include <mediautils/FixedString.h>
33 
34 namespace android::mediautils {
35 
36 /**
37  * A thread for deferred execution of tasks, with cancellation.
38  */
39 class TimerThread {
40   public:
41     // A Handle is a time_point that serves as a unique key to access a queued
42     // request to the TimerThread.
43     using Handle = std::chrono::steady_clock::time_point;
44 
45     // Duration is based on steady_clock (typically nanoseconds)
46     // vs the system_clock duration (typically microseconds).
47     using Duration = std::chrono::steady_clock::duration;
48 
49     static inline constexpr Handle INVALID_HANDLE =
50             std::chrono::steady_clock::time_point::min();
51 
52     // Handle implementation details:
53     // A Handle represents the timer expiration time based on std::chrono::steady_clock
54     // (clock monotonic).  This Handle is computed as now() + timeout.
55     //
56     // The lsb of the Handle time_point is adjusted to indicate whether there is
57     // a timeout action (1) or not (0).
58     //
59 
60     template <size_t COUNT>
61     static constexpr bool is_power_of_2_v = COUNT > 0 && (COUNT & (COUNT - 1)) == 0;
62 
63     template <size_t COUNT>
64     static constexpr size_t mask_from_count_v = COUNT - 1;
65 
66     static constexpr size_t HANDLE_TYPES = 2;
67     // HANDLE_TYPES must be a power of 2.
68     static_assert(is_power_of_2_v<HANDLE_TYPES>);
69 
70     // The handle types
71     enum class HANDLE_TYPE : size_t {
72         NO_TIMEOUT = 0,
73         TIMEOUT = 1,
74     };
75 
76     static constexpr size_t HANDLE_TYPE_MASK = mask_from_count_v<HANDLE_TYPES>;
77 
78     template <typename T>
enum_as_value(T x)79     static constexpr auto enum_as_value(T x) {
80         return static_cast<std::underlying_type_t<T>>(x);
81     }
82 
isNoTimeoutHandle(Handle handle)83     static inline bool isNoTimeoutHandle(Handle handle) {
84         return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) ==
85                 enum_as_value(HANDLE_TYPE::NO_TIMEOUT);
86     }
87 
isTimeoutHandle(Handle handle)88     static inline bool isTimeoutHandle(Handle handle) {
89         return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) ==
90                 enum_as_value(HANDLE_TYPE::TIMEOUT);
91     }
92 
93     // Returns a unique Handle that doesn't exist in the container.
94     template <size_t MAX_TYPED_HANDLES, size_t HANDLE_TYPE_AS_VALUE, typename C, typename T>
getUniqueHandleForHandleType_l(C container,T timeout)95     static Handle getUniqueHandleForHandleType_l(C container, T timeout) {
96         static_assert(MAX_TYPED_HANDLES > 0 && HANDLE_TYPE_AS_VALUE < MAX_TYPED_HANDLES
97                 && is_power_of_2_v<MAX_TYPED_HANDLES>,
98                 " handles must be power of two");
99 
100         // Our initial handle is the deadline as computed from steady_clock.
101         auto deadline = std::chrono::steady_clock::now() + timeout;
102 
103         // We adjust the lsbs by the minimum increment to have the correct
104         // HANDLE_TYPE in the least significant bits.
105         auto remainder = deadline.time_since_epoch().count() & HANDLE_TYPE_MASK;
106         size_t offset = HANDLE_TYPE_AS_VALUE > remainder ? HANDLE_TYPE_AS_VALUE - remainder :
107                      MAX_TYPED_HANDLES + HANDLE_TYPE_AS_VALUE - remainder;
108         deadline += std::chrono::steady_clock::duration(offset);
109 
110         // To avoid key collisions, advance the handle by MAX_TYPED_HANDLES (the modulus factor)
111         // until the key is unique.
112         while (container.find(deadline) != container.end()) {
113             deadline += std::chrono::steady_clock::duration(MAX_TYPED_HANDLES);
114         }
115         return deadline;
116     }
117 
118     // TimerCallback invoked on timeout or cancel.
119     using TimerCallback = std::function<void(Handle)>;
120 
121     /**
122      * Schedules a task to be executed in the future (`timeout` duration from now).
123      *
124      * \param tag     string associated with the task.  This need not be unique,
125      *                as the Handle returned is used for cancelling.
126      * \param func    callback function that is invoked at the timeout.
127      * \param timeoutDuration timeout duration which is converted to milliseconds with at
128      *                least 45 integer bits.
129      *                A timeout of 0 (or negative) means the timer never expires
130      *                so func() is never called. These tasks are stored internally
131      *                and reported in the toString() until manually cancelled.
132      * \returns       a handle that can be used for cancellation.
133      */
134     Handle scheduleTask(
135             std::string_view tag, TimerCallback&& func,
136             Duration timeoutDuration, Duration secondChanceDuration);
137 
138     /**
139      * Tracks a task that shows up on toString() until cancelled.
140      *
141      * \param tag     string associated with the task.
142      * \returns       a handle that can be used for cancellation.
143      */
144     Handle trackTask(std::string_view tag);
145 
146     /**
147      * Cancels a task previously scheduled with scheduleTask()
148      * or trackTask().
149      *
150      * \returns true if cancelled. If the task has already executed
151      *          or if the handle doesn't exist, this is a no-op
152      *          and returns false.
153      */
154     bool cancelTask(Handle handle);
155 
156     struct SnapshotAnalysis;
157     /**
158      * Take a snapshot of the current state of the TimerThread and determine the
159      * potential cause of a deadlock.
160      * \param retiredCount The number of successfully retired calls to capture
161      *                      (may be many).
162      * \return See below for a description of a SnapShotAnalysis object
163      */
164     SnapshotAnalysis getSnapshotAnalysis(size_t retiredCount = SIZE_MAX) const;
165 
166     /**
167      * Returns a string representation of the TimerThread queue.
168      *
169      * The queue is dumped in order of scheduling (not deadline).
170      */
171     std::string pendingToString() const;
172 
173     /**
174      * Returns a string representation of the last retired tasks.
175      *
176      * These tasks from trackTask() or scheduleTask() are
177      * cancelled.
178      *
179      * These are ordered when the task was retired.
180      *
181      * \param n is maximum number of tasks to dump.
182      */
183     std::string retiredToString(size_t n = SIZE_MAX) const;
184 
185 
186     /**
187      * Returns a string representation of the last timeout tasks.
188      *
189      * These tasks from scheduleTask() which have  timed-out.
190      *
191      * These are ordered when the task had timed-out.
192      *
193      * \param n is maximum number of tasks to dump.
194      */
195     std::string timeoutToString(size_t n = SIZE_MAX) const;
196 
197     /**
198      * Dumps a container with SmartPointer<Request> to a string.
199      *
200      * "{ Request1 } { Request2} ...{ RequestN }"
201      */
202     template <typename T>
requestsToString(const T & containerRequests)203     static std::string requestsToString(const T& containerRequests) {
204         std::string s;
205         // append seems to be faster than stringstream.
206         // https://stackoverflow.com/questions/18892281/most-optimized-way-of-concatenation-in-strings
207         for (const auto& request : containerRequests) {
208             s.append("{ ").append(request->toString()).append(" } ");
209         }
210         // If not empty, there's an extra space at the end, so we trim it off.
211         if (!s.empty()) s.pop_back();
212         return s;
213     }
214 
215     // To minimize movement of data, we pass around shared_ptrs to Requests.
216     // These are allocated and deallocated outside of the lock.
217     // TODO(b/243839867) consider options to merge Request with the
218     // TimeCheck::TimeCheckHandler struct.
219     struct Request {
RequestRequest220         Request(std::chrono::system_clock::time_point _scheduled,
221                 std::chrono::system_clock::time_point _deadline,
222                 Duration _secondChanceDuration,
223                 pid_t _tid,
224                 std::string_view _tag)
225             : scheduled(_scheduled)
226             , deadline(_deadline)
227             , secondChanceDuration(_secondChanceDuration)
228             , tid(_tid)
229             , tag(_tag)
230             {}
231 
232         const std::chrono::system_clock::time_point scheduled;
233         const std::chrono::system_clock::time_point deadline; // deadline := scheduled
234                                                               // + timeoutDuration
235                                                               // + secondChanceDuration
236                                                               // if deadline == scheduled, no
237                                                               // timeout, task not executed.
238         Duration secondChanceDuration;
239         const pid_t tid;
240         const FixedString62 tag;
241         std::string toString() const;
242     };
243 
244 
245     // SnapshotAnalysis contains info deduced by analysisTimeout().
246 
247     struct SnapshotAnalysis {
248         // If we were unable to determine any applicable thread ids,
249         // we leave their value as INVALID_PID.
250         // Note, we use the linux thread id (not pthread), so its type is pid_t.
251         static constexpr pid_t INVALID_PID = -1;
252         // Description of likely issue and/or blocked method.
253         // Empty if no actionable info.
254         std::string description;
255         // Tid of the (latest) monitored thread which has timed out.
256         // This is the thread which the suspect is deduced with respect to.
257         // Most often, this is the thread which an abort is being triggered
258         // from.
259         pid_t timeoutTid = INVALID_PID;
260         // Tid of the (HAL) thread which has likely halted progress, selected
261         // from pendingRequests. May be the same as timeoutTid, if the timed-out
262         // thread directly called into the HAL.
263         pid_t suspectTid = INVALID_PID;
264         // Number of second chances given by the timer thread
265         size_t secondChanceCount;
266         // List of pending requests
267         std::vector<std::shared_ptr<const Request>> pendingRequests;
268         // List of timed-out requests
269         std::vector<std::shared_ptr<const Request>> timeoutRequests;
270         // List of retired requests
271         std::vector<std::shared_ptr<const Request>> retiredRequests;
272         // mutex deadlock / wait detection information.
273         bool hasMutexCycle = false;
274         std::vector<std::pair<pid_t, std::string>> mutexWaitChain;
275         // Dumps the information contained above as well as additional call
276         // stacks where applicable.
277         std::string toString(bool showTimeoutStack = true) const;
278     };
279 
280   private:
281     // Deque of requests, in order of add().
282     // This class is thread-safe.
283     class RequestQueue {
284       public:
RequestQueue(size_t maxSize)285         explicit RequestQueue(size_t maxSize)
286             : mRequestQueueMax(maxSize) {}
287 
288         void add(std::shared_ptr<const Request>);
289 
290         // return up to the last "n" requests retired.
291         void copyRequests(std::vector<std::shared_ptr<const Request>>& requests,
292             size_t n = SIZE_MAX) const;
293 
294       private:
295         const size_t mRequestQueueMax;
296         mutable std::mutex mRQMutex;
297         std::deque<std::pair<std::chrono::system_clock::time_point,
298                              std::shared_ptr<const Request>>>
299                 mRequestQueue GUARDED_BY(mRQMutex);
300     };
301 
302     // A storage map of tasks without timeouts.  There is no TimerCallback
303     // required, it just tracks the tasks with the tag, scheduled time and the tid.
304     // These tasks show up on a pendingToString() until manually cancelled.
305     class NoTimeoutMap {
306         mutable std::mutex mNTMutex;
307         std::map<Handle, std::shared_ptr<const Request>> mMap GUARDED_BY(mNTMutex);
getUniqueHandle_l()308         Handle getUniqueHandle_l() REQUIRES(mNTMutex) {
309             return getUniqueHandleForHandleType_l<
310                     HANDLE_TYPES, enum_as_value(HANDLE_TYPE::NO_TIMEOUT)>(
311                 mMap, Duration{} /* timeout */);
312         }
313 
314       public:
315         bool isValidHandle(Handle handle) const; // lock free
316         Handle add(std::shared_ptr<const Request> request);
317         std::shared_ptr<const Request> remove(Handle handle);
318         void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
319     };
320 
321     // Monitor thread.
322     // This thread manages shared pointers to Requests and a function to
323     // call on timeout.
324     // This class is thread-safe.
325     class MonitorThread {
326         std::atomic<size_t> mSecondChanceCount{};
327         mutable std::mutex mMutex;
328         mutable std::condition_variable mCond GUARDED_BY(mMutex);
329 
330         // Ordered map of requests based on time of deadline.
331         //
332         std::map<Handle, std::pair<std::shared_ptr<const Request>, TimerCallback>>
333                 mMonitorRequests GUARDED_BY(mMutex);
334 
335         // Due to monotonic/steady clock inaccuracies during suspend,
336         // we allow an additional second chance waiting time to prevent
337         // false removal.
338 
339         // This mSecondChanceRequests queue is almost always empty.
340         // Using a pair with the original handle allows lookup and keeps
341         // the Key unique.
342         std::map<std::pair<Handle /* new */, Handle /* original */>,
343                 std::pair<std::shared_ptr<const Request>, TimerCallback>>
344                         mSecondChanceRequests GUARDED_BY(mMutex);
345 
346         RequestQueue& mTimeoutQueue GUARDED_BY(mMutex); // added to when request times out.
347 
348         // Worker thread variables
349         bool mShouldExit GUARDED_BY(mMutex) = false;
350 
351         // To avoid race with initialization,
352         // mThread should be initialized last as the thread is launched immediately.
353         std::thread mThread;
354 
355         void threadFunc();
getUniqueHandle_l(Duration timeout)356         Handle getUniqueHandle_l(Duration timeout) REQUIRES(mMutex) {
357             return getUniqueHandleForHandleType_l<
358                     HANDLE_TYPES, enum_as_value(HANDLE_TYPE::TIMEOUT)>(
359                 mMonitorRequests, timeout);
360         }
361 
362       public:
363         MonitorThread(RequestQueue &timeoutQueue);
364         ~MonitorThread();
365 
366         Handle add(std::shared_ptr<const Request> request, TimerCallback&& func,
367                 Duration timeout);
368         std::shared_ptr<const Request> remove(Handle handle);
369         void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
getSecondChanceCount()370         size_t getSecondChanceCount() const {
371             return mSecondChanceCount.load(std::memory_order_relaxed);
372         }
373     };
374 
375 
376     // A HAL method is where the substring "Hidl" is in the class name.
377     // The tag should look like: ... Hidl ... :: ...
378     static bool isRequestFromHal(const std::shared_ptr<const Request>& request);
379 
380     std::vector<std::shared_ptr<const Request>> getPendingRequests() const;
381 
382     static constexpr size_t kRetiredQueueMax = 16;
383     RequestQueue mRetiredQueue{kRetiredQueueMax};  // locked internally
384 
385     static constexpr size_t kTimeoutQueueMax = 16;
386     RequestQueue mTimeoutQueue{kTimeoutQueueMax};  // locked internally
387 
388     NoTimeoutMap mNoTimeoutMap;  // locked internally
389 
390     MonitorThread mMonitorThread{mTimeoutQueue};  // This should be initialized last because
391                                                   // the thread is launched immediately.
392                                                   // Locked internally.
393 };
394 
395 }  // namespace android::mediautils
396