1 /*
2  * Copyright (c) 2024, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "PressureMonitor.h"
21 
22 #include <android-base/stringprintf.h>
23 #include <log/log.h>
24 #include <processgroup/sched_policy.h>
25 
26 #include <errno.h>
27 #include <string.h>
28 #include <sys/epoll.h>
29 
30 namespace android {
31 namespace automotive {
32 namespace watchdog {
33 
34 using ::android::sp;
35 using ::android::base::Error;
36 using ::android::base::Result;
37 using ::android::base::StringPrintf;
38 
39 constexpr const char kThreadName[] = "PressureMonitor";
40 
PressureLevelToString(PressureLevel pressureLevel)41 std::string PressureMonitorInterface::PressureLevelToString(PressureLevel pressureLevel) {
42     switch (pressureLevel) {
43         case PRESSURE_LEVEL_NONE:
44             return "PRESSURE_LEVEL_NONE";
45         case PRESSURE_LEVEL_LOW:
46             return "PRESSURE_LEVEL_LOW";
47         case PRESSURE_LEVEL_MEDIUM:
48             return "PRESSURE_LEVEL_MEDIUM";
49         case PRESSURE_LEVEL_HIGH:
50             return "PRESSURE_LEVEL_HIGH";
51         default:
52             return "UNKNOWN_PRESSURE_LEVEL";
53     }
54 }
55 
init()56 Result<void> PressureMonitor::init() {
57     std::string memoryPath = StringPrintf("%s/%s", kProcPressureDirPath.c_str(), kMemoryFile);
58     if (access(memoryPath.c_str(), R_OK) != 0) {
59         return Error() << "'" << memoryPath << "' path is not accessible";
60     }
61 
62     Mutex::Autolock lock(mMutex);
63     // TODO(b/335508921): Read the below stall type and thresholds from system properties (one per
64     //  pressure level).
65     mPressureLevels.push_back(PressureLevelInfo{
66             .kPressureLevel = PRESSURE_LEVEL_LOW,
67             .kStallType = kLowPsiStallLevel,
68             .kThresholdUs = kLowThresholdUs,
69     });
70     mPressureLevels.push_back(PressureLevelInfo{
71             .kPressureLevel = PRESSURE_LEVEL_MEDIUM,
72             .kStallType = kMediumPsiStallLevel,
73             .kThresholdUs = kMediumThresholdUs,
74     });
75     mPressureLevels.push_back(PressureLevelInfo{
76             .kPressureLevel = PRESSURE_LEVEL_HIGH,
77             .kStallType = kHighPsiStallLevel,
78             .kThresholdUs = kHighThresholdUs,
79     });
80 
81     if (const auto& result = initializePsiMonitorsLocked(); !result.ok()) {
82         destroyActivePsiMonitorsLocked();
83         return Error() << "Failed to initialize memory PSI monitors: " << result.error();
84     }
85 
86     mIsEnabled = true;
87     return {};
88 }
89 
terminate()90 void PressureMonitor::terminate() {
91     {
92         Mutex::Autolock lock(mMutex);
93         mIsMonitorActive = false;
94         mHandlerLooper->removeMessages(sp<PressureMonitor>::fromExisting(this));
95         mHandlerLooper->wake();
96     }
97     if (mMonitorThread.joinable()) {
98         mMonitorThread.join();
99     }
100     {
101         Mutex::Autolock lock(mMutex);
102         destroyActivePsiMonitorsLocked();
103     }
104 }
105 
initializePsiMonitorsLocked()106 Result<void> PressureMonitor::initializePsiMonitorsLocked() {
107     if (mPsiEpollFd = epoll_create(mPressureLevels.size()); mPsiEpollFd < 0) {
108         return Error() << "epoll_create failed: " << strerror(errno);
109     }
110 
111     int totalActivePsiMonitors = 0;
112     for (auto& info : mPressureLevels) {
113         if (info.kThresholdUs.count() == 0) {
114             ALOGI("Disabled PSI monitor for %s",
115                   PressureLevelToString(info.kPressureLevel).c_str());
116             continue;
117         }
118         // TODO(b/335508921): Read the below window size from system properties. This need to be
119         //  read from system properties (one per pressure level) and store in the PressureLevelInfo.
120         if (info.kThresholdUs >= kPsiWindowSizeUs) {
121             return Error() << "Threshold duration (" << info.kThresholdUs.count()
122                            << ") must be less than the window size duration ("
123                            << kPsiWindowSizeUs.count() << ") for "
124                            << PressureLevelToString(info.kPressureLevel);
125         }
126         // The algorithm that determines the current pressure level and notifies the clients
127         // require all PSI monitors to be initialized successfully. So, early fail when one of
128         // PSI monitor fails to initialize.
129         int fd = mInitPsiMonitorFunc(info.kStallType, info.kThresholdUs.count(),
130                                      kPsiWindowSizeUs.count(), PSI_MEMORY);
131         if (fd < 0) {
132             return Error() << "Failed to initialize memory PSI monitor for "
133                            << PressureLevelToString(info.kPressureLevel) << ": " << strerror(errno);
134         }
135         if (mRegisterPsiMonitorFunc(mPsiEpollFd, fd, reinterpret_cast<void*>(info.kPressureLevel)) <
136             0) {
137             mDestroyPsiMonitorFunc(fd);
138             return Error() << "Failed to register memory PSI monitor for "
139                            << PressureLevelToString(info.kPressureLevel) << ": " << strerror(errno);
140         }
141         info.psiMonitorFd = fd;
142         ++totalActivePsiMonitors;
143     }
144     if (totalActivePsiMonitors == 0) {
145         return Error() << "No PSI monitors are initialized because all PSI levels are disabled";
146     }
147     ALOGI("Successfully initialized %d memory PSI monitors", totalActivePsiMonitors);
148     return {};
149 }
150 
destroyActivePsiMonitorsLocked()151 void PressureMonitor::destroyActivePsiMonitorsLocked() {
152     int totalDestroyedPsiMonitors = 0;
153     for (auto& info : mPressureLevels) {
154         if (info.psiMonitorFd < 0) {
155             continue;
156         }
157         if (mUnregisterPsiMonitorFunc(mPsiEpollFd, info.psiMonitorFd) < 0) {
158             ALOGE("Failed to unregister memory PSI monitor for %s: %s",
159                   PressureLevelToString(info.kPressureLevel).c_str(), strerror(errno));
160         }
161         mDestroyPsiMonitorFunc(info.psiMonitorFd);
162         info.psiMonitorFd = -1;
163         ++totalDestroyedPsiMonitors;
164     }
165     if (mPsiEpollFd > 0) {
166         close(mPsiEpollFd);
167         mPsiEpollFd = -1;
168     }
169     ALOGI("Destroyed %d memory PSI monitors", totalDestroyedPsiMonitors);
170 }
171 
start()172 Result<void> PressureMonitor::start() {
173     {
174         Mutex::Autolock lock(mMutex);
175         if (!mIsEnabled) {
176             return Error() << "Monitor is either disabled or not initialized";
177         }
178         if (mMonitorThread.joinable()) {
179             return Error()
180                     << "Pressure monitoring is already in progress. So skipping this request";
181         }
182         mIsMonitorActive = true;
183     }
184     mMonitorThread = std::thread([&]() {
185         if (set_sched_policy(0, SP_BACKGROUND) != 0) {
186             ALOGW("Failed to set background scheduling priority to %s thread", kThreadName);
187         }
188         if (int result = pthread_setname_np(pthread_self(), kThreadName); result != 0) {
189             ALOGW("Failed to set %s thread name: %d", kThreadName, result);
190         }
191         bool isMonitorActive;
192         {
193             Mutex::Autolock lock(mMutex);
194             mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
195             mLastPollUptimeNs = mHandlerLooper->now();
196             mHandlerLooper->sendMessage(sp<PressureMonitor>::fromExisting(this),
197                                         LooperMessage::MONITOR_PRESSURE);
198             isMonitorActive = mIsMonitorActive;
199         }
200         ALOGI("Starting pressure monitor");
201         while (isMonitorActive) {
202             mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
203             Mutex::Autolock lock(mMutex);
204             isMonitorActive = mIsMonitorActive;
205         }
206     });
207     return {};
208 }
209 
registerPressureChangeCallback(sp<PressureChangeCallbackInterface> callback)210 Result<void> PressureMonitor::registerPressureChangeCallback(
211         sp<PressureChangeCallbackInterface> callback) {
212     Mutex::Autolock lock(mMutex);
213     if (mPressureChangeCallbacks.find(callback) != mPressureChangeCallbacks.end()) {
214         return Error() << "Callback is already registered";
215     }
216     mPressureChangeCallbacks.insert(callback);
217     return {};
218 }
219 
unregisterPressureChangeCallback(sp<PressureChangeCallbackInterface> callback)220 void PressureMonitor::unregisterPressureChangeCallback(
221         sp<PressureChangeCallbackInterface> callback) {
222     Mutex::Autolock lock(mMutex);
223     const auto& it = mPressureChangeCallbacks.find(callback);
224     if (it == mPressureChangeCallbacks.end()) {
225         ALOGE("Pressure change callback is not registered. Skipping unregister request");
226         return;
227     }
228     mPressureChangeCallbacks.erase(it);
229 }
230 
handleMessage(const Message & message)231 void PressureMonitor::handleMessage(const Message& message) {
232     Result<void> result;
233     switch (message.what) {
234         case LooperMessage::MONITOR_PRESSURE:
235             if (const auto& monitorResult = monitorPressure(); !monitorResult.ok()) {
236                 result = Error() << "Failed to monitor pressure: " << monitorResult.error();
237             }
238             break;
239         case LooperMessage::NOTIFY_PRESSURE_CHANGE:
240             notifyPressureChange();
241             break;
242         default:
243             ALOGE("Skipping unknown pressure monitor message: %d", message.what);
244     }
245     if (!result.ok()) {
246         ALOGE("Terminating pressure monitor: %s", result.error().message().c_str());
247         Mutex::Autolock lock(mMutex);
248         mIsMonitorActive = false;
249     }
250 }
251 
monitorPressure()252 Result<void> PressureMonitor::monitorPressure() {
253     size_t maxEvents;
254     int psiEpollFd;
255     {
256         Mutex::Autolock lock(mMutex);
257         psiEpollFd = mPsiEpollFd;
258         maxEvents = mPressureLevels.size();
259     }
260     if (psiEpollFd < 0) {
261         return Error() << "Memory pressure monitor is not initialized";
262     }
263     struct epoll_event* events = new epoll_event[maxEvents];
264     auto result = waitForLatestPressureLevel(psiEpollFd, events, maxEvents);
265     if (!result.ok()) {
266         delete[] events;
267         return Error() << "Failed to get the latest pressure level: " << result.error();
268     }
269     delete[] events;
270 
271     Mutex::Autolock lock(mMutex);
272     if (mLatestPressureLevel != *result) {
273         mLatestPressureLevel = *result;
274         mHandlerLooper->sendMessage(sp<PressureMonitor>::fromExisting(this),
275                                     LooperMessage::NOTIFY_PRESSURE_CHANGE);
276     }
277 
278     mLastPollUptimeNs +=
279             std::chrono::duration_cast<std::chrono::nanoseconds>(mPollingIntervalMillis).count();
280     // The NOTIFY_PRESSURE_CHANGE message must be handled before MONITOR_PRESSURE message.
281     // Otherwise, the callbacks won't be notified of the recent pressure level change. To avoid
282     // inserting MONITOR_PRESSURE message before NOTIFY_PRESSURE_CHANGE message, check the uptime.
283     nsecs_t now = mHandlerLooper->now();
284     mHandlerLooper->sendMessageAtTime(mLastPollUptimeNs > now ? mLastPollUptimeNs : now,
285                                       sp<PressureMonitor>::fromExisting(this),
286                                       LooperMessage::MONITOR_PRESSURE);
287     return {};
288 }
289 
waitForLatestPressureLevel(int psiEpollFd,epoll_event * events,size_t maxEvents)290 Result<PressureMonitor::PressureLevel> PressureMonitor::waitForLatestPressureLevel(
291         int psiEpollFd, epoll_event* events, size_t maxEvents) {
292     PressureLevel highestActivePressure;
293     {
294         Mutex::Autolock lock(mMutex);
295         highestActivePressure = mLatestPressureLevel;
296     }
297     int totalActiveEvents;
298     do {
299         if (highestActivePressure == PRESSURE_LEVEL_NONE) {
300             // When the recent pressure level was none, wait with no timeout until the pressure
301             // increases.
302             totalActiveEvents = mEpollWaitFunc(psiEpollFd, events, maxEvents, /*timeout=*/-1);
303         } else {
304             // When the recent pressure level was high, assume that the pressure will stay high
305             // for at least 1 second. Within 1 second window, the memory pressure state can go up
306             // causing an event to trigger or it can go down when the window expires.
307 
308             // TODO(b/333411972): Review whether 1 second wait is sufficient and whether an event
309             //  will trigger if the memory pressure continues to stay higher for more than this
310             //  period.
311             totalActiveEvents =
312                     mEpollWaitFunc(psiEpollFd, events, maxEvents, mPollingIntervalMillis.count());
313             if (totalActiveEvents == 0) {
314                 return PRESSURE_LEVEL_NONE;
315             }
316         }
317         // Keep waiting if interrupted.
318     } while (totalActiveEvents == -1 && errno == EINTR);
319 
320     if (totalActiveEvents == -1) {
321         return Error() << "epoll_wait failed while waiting for PSI events: " << strerror(errno);
322     }
323     // Reset and identify the recent highest active pressure from the PSI events.
324     highestActivePressure = PRESSURE_LEVEL_NONE;
325 
326     for (int i = 0; i < totalActiveEvents; i++) {
327         if (events[i].events & (EPOLLERR | EPOLLHUP)) {
328             // Should never happen unless psi got disabled in the Kernel.
329             return Error() << "Memory pressure events are not available anymore";
330         }
331         if (events[i].data.u32 > highestActivePressure) {
332             highestActivePressure = static_cast<PressureLevel>(events[i].data.u32);
333         }
334     }
335     return highestActivePressure;
336 }
337 
notifyPressureChange()338 void PressureMonitor::notifyPressureChange() {
339     PressureLevel pressureLevel;
340     std::unordered_set<sp<PressureChangeCallbackInterface>, SpHash<PressureChangeCallbackInterface>>
341             callbacks;
342     {
343         Mutex::Autolock lock(mMutex);
344         pressureLevel = mLatestPressureLevel;
345         callbacks = mPressureChangeCallbacks;
346     }
347     if (DEBUG) {
348         ALOGD("Sending pressure change notification to %zu callbacks", callbacks.size());
349     }
350     for (const sp<PressureChangeCallbackInterface>& callback : callbacks) {
351         callback->onPressureChanged(pressureLevel);
352     }
353 }
354 
355 }  // namespace watchdog
356 }  // namespace automotive
357 }  // namespace android
358