1 /*
2 * Copyright (c) 2024, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "PressureMonitor.h"
21
22 #include <android-base/stringprintf.h>
23 #include <log/log.h>
24 #include <processgroup/sched_policy.h>
25
26 #include <errno.h>
27 #include <string.h>
28 #include <sys/epoll.h>
29
30 namespace android {
31 namespace automotive {
32 namespace watchdog {
33
34 using ::android::sp;
35 using ::android::base::Error;
36 using ::android::base::Result;
37 using ::android::base::StringPrintf;
38
39 constexpr const char kThreadName[] = "PressureMonitor";
40
PressureLevelToString(PressureLevel pressureLevel)41 std::string PressureMonitorInterface::PressureLevelToString(PressureLevel pressureLevel) {
42 switch (pressureLevel) {
43 case PRESSURE_LEVEL_NONE:
44 return "PRESSURE_LEVEL_NONE";
45 case PRESSURE_LEVEL_LOW:
46 return "PRESSURE_LEVEL_LOW";
47 case PRESSURE_LEVEL_MEDIUM:
48 return "PRESSURE_LEVEL_MEDIUM";
49 case PRESSURE_LEVEL_HIGH:
50 return "PRESSURE_LEVEL_HIGH";
51 default:
52 return "UNKNOWN_PRESSURE_LEVEL";
53 }
54 }
55
init()56 Result<void> PressureMonitor::init() {
57 std::string memoryPath = StringPrintf("%s/%s", kProcPressureDirPath.c_str(), kMemoryFile);
58 if (access(memoryPath.c_str(), R_OK) != 0) {
59 return Error() << "'" << memoryPath << "' path is not accessible";
60 }
61
62 Mutex::Autolock lock(mMutex);
63 // TODO(b/335508921): Read the below stall type and thresholds from system properties (one per
64 // pressure level).
65 mPressureLevels.push_back(PressureLevelInfo{
66 .kPressureLevel = PRESSURE_LEVEL_LOW,
67 .kStallType = kLowPsiStallLevel,
68 .kThresholdUs = kLowThresholdUs,
69 });
70 mPressureLevels.push_back(PressureLevelInfo{
71 .kPressureLevel = PRESSURE_LEVEL_MEDIUM,
72 .kStallType = kMediumPsiStallLevel,
73 .kThresholdUs = kMediumThresholdUs,
74 });
75 mPressureLevels.push_back(PressureLevelInfo{
76 .kPressureLevel = PRESSURE_LEVEL_HIGH,
77 .kStallType = kHighPsiStallLevel,
78 .kThresholdUs = kHighThresholdUs,
79 });
80
81 if (const auto& result = initializePsiMonitorsLocked(); !result.ok()) {
82 destroyActivePsiMonitorsLocked();
83 return Error() << "Failed to initialize memory PSI monitors: " << result.error();
84 }
85
86 mIsEnabled = true;
87 return {};
88 }
89
terminate()90 void PressureMonitor::terminate() {
91 {
92 Mutex::Autolock lock(mMutex);
93 mIsMonitorActive = false;
94 mHandlerLooper->removeMessages(sp<PressureMonitor>::fromExisting(this));
95 mHandlerLooper->wake();
96 }
97 if (mMonitorThread.joinable()) {
98 mMonitorThread.join();
99 }
100 {
101 Mutex::Autolock lock(mMutex);
102 destroyActivePsiMonitorsLocked();
103 }
104 }
105
initializePsiMonitorsLocked()106 Result<void> PressureMonitor::initializePsiMonitorsLocked() {
107 if (mPsiEpollFd = epoll_create(mPressureLevels.size()); mPsiEpollFd < 0) {
108 return Error() << "epoll_create failed: " << strerror(errno);
109 }
110
111 int totalActivePsiMonitors = 0;
112 for (auto& info : mPressureLevels) {
113 if (info.kThresholdUs.count() == 0) {
114 ALOGI("Disabled PSI monitor for %s",
115 PressureLevelToString(info.kPressureLevel).c_str());
116 continue;
117 }
118 // TODO(b/335508921): Read the below window size from system properties. This need to be
119 // read from system properties (one per pressure level) and store in the PressureLevelInfo.
120 if (info.kThresholdUs >= kPsiWindowSizeUs) {
121 return Error() << "Threshold duration (" << info.kThresholdUs.count()
122 << ") must be less than the window size duration ("
123 << kPsiWindowSizeUs.count() << ") for "
124 << PressureLevelToString(info.kPressureLevel);
125 }
126 // The algorithm that determines the current pressure level and notifies the clients
127 // require all PSI monitors to be initialized successfully. So, early fail when one of
128 // PSI monitor fails to initialize.
129 int fd = mInitPsiMonitorFunc(info.kStallType, info.kThresholdUs.count(),
130 kPsiWindowSizeUs.count(), PSI_MEMORY);
131 if (fd < 0) {
132 return Error() << "Failed to initialize memory PSI monitor for "
133 << PressureLevelToString(info.kPressureLevel) << ": " << strerror(errno);
134 }
135 if (mRegisterPsiMonitorFunc(mPsiEpollFd, fd, reinterpret_cast<void*>(info.kPressureLevel)) <
136 0) {
137 mDestroyPsiMonitorFunc(fd);
138 return Error() << "Failed to register memory PSI monitor for "
139 << PressureLevelToString(info.kPressureLevel) << ": " << strerror(errno);
140 }
141 info.psiMonitorFd = fd;
142 ++totalActivePsiMonitors;
143 }
144 if (totalActivePsiMonitors == 0) {
145 return Error() << "No PSI monitors are initialized because all PSI levels are disabled";
146 }
147 ALOGI("Successfully initialized %d memory PSI monitors", totalActivePsiMonitors);
148 return {};
149 }
150
destroyActivePsiMonitorsLocked()151 void PressureMonitor::destroyActivePsiMonitorsLocked() {
152 int totalDestroyedPsiMonitors = 0;
153 for (auto& info : mPressureLevels) {
154 if (info.psiMonitorFd < 0) {
155 continue;
156 }
157 if (mUnregisterPsiMonitorFunc(mPsiEpollFd, info.psiMonitorFd) < 0) {
158 ALOGE("Failed to unregister memory PSI monitor for %s: %s",
159 PressureLevelToString(info.kPressureLevel).c_str(), strerror(errno));
160 }
161 mDestroyPsiMonitorFunc(info.psiMonitorFd);
162 info.psiMonitorFd = -1;
163 ++totalDestroyedPsiMonitors;
164 }
165 if (mPsiEpollFd > 0) {
166 close(mPsiEpollFd);
167 mPsiEpollFd = -1;
168 }
169 ALOGI("Destroyed %d memory PSI monitors", totalDestroyedPsiMonitors);
170 }
171
start()172 Result<void> PressureMonitor::start() {
173 {
174 Mutex::Autolock lock(mMutex);
175 if (!mIsEnabled) {
176 return Error() << "Monitor is either disabled or not initialized";
177 }
178 if (mMonitorThread.joinable()) {
179 return Error()
180 << "Pressure monitoring is already in progress. So skipping this request";
181 }
182 mIsMonitorActive = true;
183 }
184 mMonitorThread = std::thread([&]() {
185 if (set_sched_policy(0, SP_BACKGROUND) != 0) {
186 ALOGW("Failed to set background scheduling priority to %s thread", kThreadName);
187 }
188 if (int result = pthread_setname_np(pthread_self(), kThreadName); result != 0) {
189 ALOGW("Failed to set %s thread name: %d", kThreadName, result);
190 }
191 bool isMonitorActive;
192 {
193 Mutex::Autolock lock(mMutex);
194 mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
195 mLastPollUptimeNs = mHandlerLooper->now();
196 mHandlerLooper->sendMessage(sp<PressureMonitor>::fromExisting(this),
197 LooperMessage::MONITOR_PRESSURE);
198 isMonitorActive = mIsMonitorActive;
199 }
200 ALOGI("Starting pressure monitor");
201 while (isMonitorActive) {
202 mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
203 Mutex::Autolock lock(mMutex);
204 isMonitorActive = mIsMonitorActive;
205 }
206 });
207 return {};
208 }
209
registerPressureChangeCallback(sp<PressureChangeCallbackInterface> callback)210 Result<void> PressureMonitor::registerPressureChangeCallback(
211 sp<PressureChangeCallbackInterface> callback) {
212 Mutex::Autolock lock(mMutex);
213 if (mPressureChangeCallbacks.find(callback) != mPressureChangeCallbacks.end()) {
214 return Error() << "Callback is already registered";
215 }
216 mPressureChangeCallbacks.insert(callback);
217 return {};
218 }
219
unregisterPressureChangeCallback(sp<PressureChangeCallbackInterface> callback)220 void PressureMonitor::unregisterPressureChangeCallback(
221 sp<PressureChangeCallbackInterface> callback) {
222 Mutex::Autolock lock(mMutex);
223 const auto& it = mPressureChangeCallbacks.find(callback);
224 if (it == mPressureChangeCallbacks.end()) {
225 ALOGE("Pressure change callback is not registered. Skipping unregister request");
226 return;
227 }
228 mPressureChangeCallbacks.erase(it);
229 }
230
handleMessage(const Message & message)231 void PressureMonitor::handleMessage(const Message& message) {
232 Result<void> result;
233 switch (message.what) {
234 case LooperMessage::MONITOR_PRESSURE:
235 if (const auto& monitorResult = monitorPressure(); !monitorResult.ok()) {
236 result = Error() << "Failed to monitor pressure: " << monitorResult.error();
237 }
238 break;
239 case LooperMessage::NOTIFY_PRESSURE_CHANGE:
240 notifyPressureChange();
241 break;
242 default:
243 ALOGE("Skipping unknown pressure monitor message: %d", message.what);
244 }
245 if (!result.ok()) {
246 ALOGE("Terminating pressure monitor: %s", result.error().message().c_str());
247 Mutex::Autolock lock(mMutex);
248 mIsMonitorActive = false;
249 }
250 }
251
monitorPressure()252 Result<void> PressureMonitor::monitorPressure() {
253 size_t maxEvents;
254 int psiEpollFd;
255 {
256 Mutex::Autolock lock(mMutex);
257 psiEpollFd = mPsiEpollFd;
258 maxEvents = mPressureLevels.size();
259 }
260 if (psiEpollFd < 0) {
261 return Error() << "Memory pressure monitor is not initialized";
262 }
263 struct epoll_event* events = new epoll_event[maxEvents];
264 auto result = waitForLatestPressureLevel(psiEpollFd, events, maxEvents);
265 if (!result.ok()) {
266 delete[] events;
267 return Error() << "Failed to get the latest pressure level: " << result.error();
268 }
269 delete[] events;
270
271 Mutex::Autolock lock(mMutex);
272 if (mLatestPressureLevel != *result) {
273 mLatestPressureLevel = *result;
274 mHandlerLooper->sendMessage(sp<PressureMonitor>::fromExisting(this),
275 LooperMessage::NOTIFY_PRESSURE_CHANGE);
276 }
277
278 mLastPollUptimeNs +=
279 std::chrono::duration_cast<std::chrono::nanoseconds>(mPollingIntervalMillis).count();
280 // The NOTIFY_PRESSURE_CHANGE message must be handled before MONITOR_PRESSURE message.
281 // Otherwise, the callbacks won't be notified of the recent pressure level change. To avoid
282 // inserting MONITOR_PRESSURE message before NOTIFY_PRESSURE_CHANGE message, check the uptime.
283 nsecs_t now = mHandlerLooper->now();
284 mHandlerLooper->sendMessageAtTime(mLastPollUptimeNs > now ? mLastPollUptimeNs : now,
285 sp<PressureMonitor>::fromExisting(this),
286 LooperMessage::MONITOR_PRESSURE);
287 return {};
288 }
289
waitForLatestPressureLevel(int psiEpollFd,epoll_event * events,size_t maxEvents)290 Result<PressureMonitor::PressureLevel> PressureMonitor::waitForLatestPressureLevel(
291 int psiEpollFd, epoll_event* events, size_t maxEvents) {
292 PressureLevel highestActivePressure;
293 {
294 Mutex::Autolock lock(mMutex);
295 highestActivePressure = mLatestPressureLevel;
296 }
297 int totalActiveEvents;
298 do {
299 if (highestActivePressure == PRESSURE_LEVEL_NONE) {
300 // When the recent pressure level was none, wait with no timeout until the pressure
301 // increases.
302 totalActiveEvents = mEpollWaitFunc(psiEpollFd, events, maxEvents, /*timeout=*/-1);
303 } else {
304 // When the recent pressure level was high, assume that the pressure will stay high
305 // for at least 1 second. Within 1 second window, the memory pressure state can go up
306 // causing an event to trigger or it can go down when the window expires.
307
308 // TODO(b/333411972): Review whether 1 second wait is sufficient and whether an event
309 // will trigger if the memory pressure continues to stay higher for more than this
310 // period.
311 totalActiveEvents =
312 mEpollWaitFunc(psiEpollFd, events, maxEvents, mPollingIntervalMillis.count());
313 if (totalActiveEvents == 0) {
314 return PRESSURE_LEVEL_NONE;
315 }
316 }
317 // Keep waiting if interrupted.
318 } while (totalActiveEvents == -1 && errno == EINTR);
319
320 if (totalActiveEvents == -1) {
321 return Error() << "epoll_wait failed while waiting for PSI events: " << strerror(errno);
322 }
323 // Reset and identify the recent highest active pressure from the PSI events.
324 highestActivePressure = PRESSURE_LEVEL_NONE;
325
326 for (int i = 0; i < totalActiveEvents; i++) {
327 if (events[i].events & (EPOLLERR | EPOLLHUP)) {
328 // Should never happen unless psi got disabled in the Kernel.
329 return Error() << "Memory pressure events are not available anymore";
330 }
331 if (events[i].data.u32 > highestActivePressure) {
332 highestActivePressure = static_cast<PressureLevel>(events[i].data.u32);
333 }
334 }
335 return highestActivePressure;
336 }
337
notifyPressureChange()338 void PressureMonitor::notifyPressureChange() {
339 PressureLevel pressureLevel;
340 std::unordered_set<sp<PressureChangeCallbackInterface>, SpHash<PressureChangeCallbackInterface>>
341 callbacks;
342 {
343 Mutex::Autolock lock(mMutex);
344 pressureLevel = mLatestPressureLevel;
345 callbacks = mPressureChangeCallbacks;
346 }
347 if (DEBUG) {
348 ALOGD("Sending pressure change notification to %zu callbacks", callbacks.size());
349 }
350 for (const sp<PressureChangeCallbackInterface>& callback : callbacks) {
351 callback->onPressureChanged(pressureLevel);
352 }
353 }
354
355 } // namespace watchdog
356 } // namespace automotive
357 } // namespace android
358