/* * Copyright (C) 2021 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "TimerThread" #include #include #include #include #include #include #include #include #include #include using namespace std::chrono_literals; namespace android::mediautils { extern std::string formatTime(std::chrono::system_clock::time_point t); extern std::string_view timeSuffix(std::string_view time1, std::string_view time2); TimerThread::Handle TimerThread::scheduleTask( std::string_view tag, TimerCallback&& func, Duration timeoutDuration, Duration secondChanceDuration) { const auto now = std::chrono::system_clock::now(); auto request = std::make_shared(now, now + std::chrono::duration_cast(timeoutDuration), secondChanceDuration, getThreadIdWrapper(), tag); return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration); } TimerThread::Handle TimerThread::trackTask(std::string_view tag) { const auto now = std::chrono::system_clock::now(); auto request = std::make_shared(now, now, Duration{} /* secondChanceDuration */, getThreadIdWrapper(), tag); return mNoTimeoutMap.add(std::move(request)); } bool TimerThread::cancelTask(Handle handle) { std::shared_ptr request = isNoTimeoutHandle(handle) ? mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle); if (!request) return false; mRetiredQueue.add(std::move(request)); return true; } std::string TimerThread::SnapshotAnalysis::toString(bool showTimeoutStack) const { // Note: These request queues are snapshot very close together but // not at "identical" times as we don't use a class-wide lock. std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]"); std::string timeoutStack; std::string blockedStack; std::string mutexWaitChainStack; if (showTimeoutStack && timeoutTid != -1) { timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(") .append(std::to_string(timeoutTid)).append(") callstack [\n") .append(getCallStackStringForTid(timeoutTid)).append("]"); } if (suspectTid != -1 && suspectTid != timeoutTid) { blockedStack = std::string("\nblocked(") .append(std::to_string(suspectTid)).append(") callstack [\n") .append(getCallStackStringForTid(suspectTid)).append("]"); } if (!mutexWaitChain.empty()) { mutexWaitChainStack.append("\nmutex wait chain [\n"); // the wait chain omits the initial timeout tid (which is good as we don't // need to suppress it). for (size_t i = 0; i < mutexWaitChain.size(); ++i) { const auto& [tid, name] = mutexWaitChain[i]; mutexWaitChainStack.append("{ tid: ").append(std::to_string(tid)) .append(" (holding ").append(name).append(")"); if (tid == timeoutTid) { mutexWaitChainStack.append(" TIMEOUT_STACK }\n"); } else if (tid == suspectTid) { mutexWaitChainStack.append(" BLOCKED_STACK }\n"); } else if (hasMutexCycle && i == mutexWaitChain.size() - 1) { // for a cycle, the last pid in the chain is repeated. mutexWaitChainStack.append(" CYCLE_STACK }\n"); } else { mutexWaitChainStack.append(" callstack [\n") .append(getCallStackStringForTid(tid)).append("] }\n"); } } mutexWaitChainStack.append("]"); } return std::string("now ") .append(formatTime(std::chrono::system_clock::now())) .append("\nsecondChanceCount ") .append(std::to_string(secondChanceCount)) .append(analysisSummary) .append("\ntimeout [ ") .append(requestsToString(timeoutRequests)) .append(" ]\npending [ ") .append(requestsToString(pendingRequests)) .append(" ]\nretired [ ") .append(requestsToString(retiredRequests)) .append(" ]") .append(timeoutStack) .append(blockedStack) .append(mutexWaitChainStack); } // A HAL method is where the substring "Hidl" is in the class name. // The tag should look like: ... Hidl ... :: ... // When the audio HAL is updated to AIDL perhaps we will use instead // a global directory of HAL classes. // // See MethodStatistics.cpp: // mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL) // /* static */ bool TimerThread::isRequestFromHal(const std::shared_ptr& request) { for (const auto& s : {"Hidl", "Aidl"}) { const auto& tagSV = request->tag.asStringView(); const size_t halStrPos = tagSV.find(s); // should be a separator afterwards Hidl/Aidl which indicates the string was in the class. if (halStrPos != std::string::npos && tagSV.find("::", halStrPos) != std::string::npos) { return true; } } return false; } struct TimerThread::SnapshotAnalysis TimerThread::getSnapshotAnalysis(size_t retiredCount) const { struct SnapshotAnalysis analysis{}; // The following snapshot of the TimerThread state will be utilized for // analysis. Note, there is no lock around these calls, so there could be // a state update between them. mTimeoutQueue.copyRequests(analysis.timeoutRequests); mRetiredQueue.copyRequests(analysis.retiredRequests, retiredCount); analysis.pendingRequests = getPendingRequests(); analysis.secondChanceCount = mMonitorThread.getSecondChanceCount(); // No call has timed out, so there is no analysis to be done. if (analysis.timeoutRequests.empty()) { return analysis; } // for now look at last timeout (in our case, the only timeout) const std::shared_ptr timeout = analysis.timeoutRequests.back(); analysis.timeoutTid = timeout->tid; std::string& description = analysis.description; // audio mutex specific wait chain analysis auto deadlockInfo = audio_utils::mutex::deadlock_detection(analysis.timeoutTid); ALOGD("%s: deadlockInfo: %s", __func__, deadlockInfo.to_string().c_str()); if (!deadlockInfo.empty()) { if (!description.empty()) description.append("\n"); description.append(deadlockInfo.to_string()); } analysis.hasMutexCycle = deadlockInfo.has_cycle; analysis.mutexWaitChain = std::move(deadlockInfo.chain); // no pending requests, we don't attempt to use temporal correlation between a recent call. if (analysis.pendingRequests.empty()) { return analysis; } // pending Requests that are problematic. std::vector> pendingExact; std::vector> pendingPossible; // We look at pending requests that were scheduled no later than kPendingDuration // after the timeout request. This prevents false matches with calls // that naturally block for a short period of time // such as HAL write() and read(). // constexpr Duration kPendingDuration = 1000ms; for (const auto& pending : analysis.pendingRequests) { // If the pending tid is the same as timeout tid, problem identified. if (pending->tid == timeout->tid) { pendingExact.emplace_back(pending); continue; } // if the pending tid is scheduled within time limit if (pending->scheduled - timeout->scheduled < kPendingDuration) { pendingPossible.emplace_back(pending); } } if (!pendingExact.empty()) { const auto& request = pendingExact.front(); const bool hal = isRequestFromHal(request); if (hal) { if (!description.empty()) description.append("\n"); description.append("Blocked directly due to HAL call: ") .append(request->toString()); analysis.suspectTid = request->tid; } } if (description.empty() && !pendingPossible.empty()) { for (const auto& request : pendingPossible) { const bool hal = isRequestFromHal(request); if (hal) { // The first blocked call is the most likely one. // Recent calls might be temporarily blocked // calls such as write() or read() depending on kDuration. description = std::string("Blocked possibly due to HAL call: ") .append(request->toString()); analysis.suspectTid= request->tid; } } } return analysis; } std::vector> TimerThread::getPendingRequests() const { constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc. std::vector> pendingRequests; pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock. // following are internally locked calls, which add to our local pendingRequests. mMonitorThread.copyRequests(pendingRequests); mNoTimeoutMap.copyRequests(pendingRequests); // Sort in order of scheduled time. std::sort(pendingRequests.begin(), pendingRequests.end(), [](const std::shared_ptr& r1, const std::shared_ptr& r2) { return r1->scheduled < r2->scheduled; }); return pendingRequests; } std::string TimerThread::pendingToString() const { return requestsToString(getPendingRequests()); } std::string TimerThread::retiredToString(size_t n) const { std::vector> retiredRequests; mRetiredQueue.copyRequests(retiredRequests, n); // Dump to string return requestsToString(retiredRequests); } std::string TimerThread::timeoutToString(size_t n) const { std::vector> timeoutRequests; mTimeoutQueue.copyRequests(timeoutRequests, n); // Dump to string return requestsToString(timeoutRequests); } std::string TimerThread::Request::toString() const { const auto scheduledString = formatTime(scheduled); const auto deadlineString = formatTime(deadline); return std::string(tag) .append(" scheduled ").append(scheduledString) .append(" deadline ").append(timeSuffix(scheduledString, deadlineString)) .append(" tid ").append(std::to_string(tid)); } void TimerThread::RequestQueue::add(std::shared_ptr request) { std::lock_guard lg(mRQMutex); mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request)); if (mRequestQueue.size() > mRequestQueueMax) { mRequestQueue.pop_front(); } } void TimerThread::RequestQueue::copyRequests( std::vector>& requests, size_t n) const { std::lock_guard lg(mRQMutex); const size_t size = mRequestQueue.size(); size_t i = n >= size ? 0 : size - n; for (; i < size; ++i) { const auto &[time, request] = mRequestQueue[i]; requests.emplace_back(request); } } TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr request) { std::lock_guard lg(mNTMutex); // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1), // This need not be under a lock, but we do so anyhow. const Handle handle = getUniqueHandle_l(); mMap[handle] = request; return handle; } std::shared_ptr TimerThread::NoTimeoutMap::remove(Handle handle) { std::lock_guard lg(mNTMutex); auto it = mMap.find(handle); if (it == mMap.end()) return {}; auto request = it->second; mMap.erase(it); return request; } void TimerThread::NoTimeoutMap::copyRequests( std::vector>& requests) const { std::lock_guard lg(mNTMutex); for (const auto &[handle, request] : mMap) { requests.emplace_back(request); } } TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue) : mTimeoutQueue(timeoutQueue) , mThread([this] { threadFunc(); }) { pthread_setname_np(mThread.native_handle(), "TimerThread"); pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO); } TimerThread::MonitorThread::~MonitorThread() { { std::lock_guard _l(mMutex); mShouldExit = true; mCond.notify_all(); } mThread.join(); } void TimerThread::MonitorThread::threadFunc() { std::unique_lock _l(mMutex); ::android::base::ScopedLockAssertion lock_assertion(mMutex); while (!mShouldExit) { Handle nextDeadline = INVALID_HANDLE; Handle now = INVALID_HANDLE; if (!mMonitorRequests.empty()) { nextDeadline = mMonitorRequests.begin()->first; now = std::chrono::steady_clock::now(); if (nextDeadline < now) { auto node = mMonitorRequests.extract(mMonitorRequests.begin()); // Deadline has expired, handle the request. auto secondChanceDuration = node.mapped().first->secondChanceDuration; if (secondChanceDuration.count() != 0) { // We now apply the second chance duration to find the clock // monotonic second deadline. The unique key is then the // pair. // // The second chance prevents a false timeout should there be // any clock monotonic advancement during suspend. auto newHandle = now + secondChanceDuration; ALOGD("%s: TimeCheck second chance applied for %s", __func__, node.mapped().first->tag.c_str()); // should be rare event. mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(), std::make_pair(newHandle, nextDeadline), std::move(node.mapped())); // increment second chance counter. mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed); } else { { _l.unlock(); // We add Request to retired queue early so that it can be dumped out. mTimeoutQueue.add(std::move(node.mapped().first)); node.mapped().second(nextDeadline); // Caution: we don't hold lock when we call TimerCallback, // but this is the timeout case! We will crash soon, // maybe before returning. // anything left over is released here outside lock. } // reacquire the lock - if something was added, we loop immediately to check. _l.lock(); } // always process expiring monitor requests first. continue; } } // now process any second chance requests. if (!mSecondChanceRequests.empty()) { Handle secondDeadline = mSecondChanceRequests.begin()->first.first; if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now(); if (secondDeadline < now) { auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin()); { _l.unlock(); // We add Request to retired queue early so that it can be dumped out. mTimeoutQueue.add(std::move(node.mapped().first)); const Handle originalHandle = node.key().second; node.mapped().second(originalHandle); // Caution: we don't hold lock when we call TimerCallback. // This is benign issue - we permit concurrent operations // while in the callback to the MonitorQueue. // // Anything left over is released here outside lock. } // reacquire the lock - if something was added, we loop immediately to check. _l.lock(); continue; } // update the deadline. if (nextDeadline == INVALID_HANDLE) { nextDeadline = secondDeadline; } else { nextDeadline = std::min(nextDeadline, secondDeadline); } } if (nextDeadline != INVALID_HANDLE) { mCond.wait_until(_l, nextDeadline); } else { mCond.wait(_l); } } } TimerThread::Handle TimerThread::MonitorThread::add( std::shared_ptr request, TimerCallback&& func, Duration timeout) { std::lock_guard _l(mMutex); const Handle handle = getUniqueHandle_l(timeout); mMonitorRequests.emplace_hint(mMonitorRequests.end(), handle, std::make_pair(std::move(request), std::move(func))); mCond.notify_all(); return handle; } std::shared_ptr TimerThread::MonitorThread::remove(Handle handle) { std::pair, TimerCallback> data; std::unique_lock ul(mMutex); ::android::base::ScopedLockAssertion lock_assertion(mMutex); if (const auto it = mMonitorRequests.find(handle); it != mMonitorRequests.end()) { data = std::move(it->second); mMonitorRequests.erase(it); ul.unlock(); // manually release lock here so func (data.second) // is released outside of lock. return data.first; // request } // this check is O(N), but since the second chance requests are ordered // in terms of earliest expiration time, we would expect better than average results. for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) { if (it->first.second == handle) { data = std::move(it->second); mSecondChanceRequests.erase(it); ul.unlock(); // manually release lock here so func (data.second) // is released outside of lock. return data.first; // request } } return {}; } void TimerThread::MonitorThread::copyRequests( std::vector>& requests) const { std::lock_guard lg(mMutex); for (const auto &[deadline, monitorpair] : mMonitorRequests) { requests.emplace_back(monitorpair.first); } // we combine the second map with the first map - this is // everything that is pending on the monitor thread. // The second map will be older than the first map so this // is in order. for (const auto &[deadline, monitorpair] : mSecondChanceRequests) { requests.emplace_back(monitorpair.first); } } } // namespace android::mediautils