1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 
19 #include "ProcPidStat.h"
20 
21 #include <android-base/file.h>
22 #include <android-base/parseint.h>
23 #include <android-base/strings.h>
24 #include <dirent.h>
25 #include <log/log.h>
26 
27 #include <string>
28 #include <unordered_map>
29 #include <vector>
30 
31 namespace android {
32 namespace automotive {
33 namespace watchdog {
34 
35 using android::base::EndsWith;
36 using android::base::Error;
37 using android::base::ParseInt;
38 using android::base::ParseUint;
39 using android::base::ReadFileToString;
40 using android::base::Result;
41 using android::base::Split;
42 
43 namespace {
44 
45 enum ReadError {
46     ERR_INVALID_FILE = 0,
47     ERR_FILE_OPEN_READ = 1,
48     NUM_ERRORS = 2,
49 };
50 
51 // /proc/PID/stat or /proc/PID/task/TID/stat format:
52 // <pid> <comm> <state> <ppid> <pgrp ID> <session ID> <tty_nr> <tpgid> <flags> <minor faults>
53 // <children minor faults> <major faults> <children major faults> <user mode time>
54 // <system mode time> <children user mode time> <children kernel mode time> <priority> <nice value>
55 // <num threads> <start time since boot> <virtual memory size> <resident set size> <rss soft limit>
56 // <start code addr> <end code addr> <start stack addr> <ESP value> <EIP> <bitmap of pending sigs>
57 // <bitmap of blocked sigs> <bitmap of ignored sigs> <waiting channel> <num pages swapped>
58 // <cumulative pages swapped> <exit signal> <processor #> <real-time prio> <agg block I/O delays>
59 // <guest time> <children guest time> <start data addr> <end data addr> <start break addr>
60 // <cmd line args start addr> <amd line args end addr> <env start addr> <env end addr> <exit code>
61 // Example line: 1 (init) S 0 0 0 0 0 0 0 0 220 0 0 0 0 0 0 0 2 0 0 ...etc...
parsePidStatLine(const std::string & line,PidStat * pidStat)62 bool parsePidStatLine(const std::string& line, PidStat* pidStat) {
63     std::vector<std::string> fields = Split(line, " ");
64 
65     // Note: Regex parsing for the below logic increased the time taken to run the
66     // ProcPidStatTest#TestProcPidStatContentsFromDevice from 151.7ms to 1.3 seconds.
67 
68     // Comm string is enclosed with ( ) brackets and may contain space(s). Thus calculate the
69     // commEndOffset based on the field that contains the closing bracket.
70     size_t commEndOffset = 0;
71     for (size_t i = 1; i < fields.size(); ++i) {
72         pidStat->comm += fields[i];
73         if (EndsWith(fields[i], ")")) {
74             commEndOffset = i - 1;
75             break;
76         }
77         pidStat->comm += " ";
78     }
79 
80     if (pidStat->comm.front() != '(' || pidStat->comm.back() != ')') {
81         ALOGW("Comm string `%s` not enclosed in brackets", pidStat->comm.c_str());
82         return false;
83     }
84     pidStat->comm.erase(pidStat->comm.begin());
85     pidStat->comm.erase(pidStat->comm.end() - 1);
86 
87     // The required data is in the first 22 + |commEndOffset| fields so make sure there are at least
88     // these many fields in the file.
89     if (fields.size() < 22 + commEndOffset || !ParseUint(fields[0], &pidStat->pid) ||
90         !ParseUint(fields[3 + commEndOffset], &pidStat->ppid) ||
91         !ParseUint(fields[11 + commEndOffset], &pidStat->majorFaults) ||
92         !ParseUint(fields[19 + commEndOffset], &pidStat->numThreads) ||
93         !ParseUint(fields[21 + commEndOffset], &pidStat->startTime)) {
94         ALOGW("Invalid proc pid stat contents: \"%s\"", line.c_str());
95         return false;
96     }
97     pidStat->state = fields[2 + commEndOffset];
98     return true;
99 }
100 
readPidStatFile(const std::string & path,PidStat * pidStat)101 Result<void> readPidStatFile(const std::string& path, PidStat* pidStat) {
102     std::string buffer;
103     if (!ReadFileToString(path, &buffer)) {
104         return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
105     }
106     std::vector<std::string> lines = Split(std::move(buffer), "\n");
107     if (lines.size() != 1 && (lines.size() != 2 || !lines[1].empty())) {
108         return Error(ERR_INVALID_FILE) << path << " contains " << lines.size() << " lines != 1";
109     }
110     if (!parsePidStatLine(std::move(lines[0]), pidStat)) {
111         return Error(ERR_INVALID_FILE) << "Failed to parse the contents of " << path;
112     }
113     return {};
114 }
115 
116 }  // namespace
117 
collect()118 Result<std::vector<ProcessStats>> ProcPidStat::collect() {
119     if (!mEnabled) {
120         return Error() << "Can not access PID stat files under " << kProcDirPath;
121     }
122 
123     Mutex::Autolock lock(mMutex);
124     const auto& processStats = getProcessStatsLocked();
125     if (!processStats) {
126         return Error() << processStats.error();
127     }
128 
129     std::vector<ProcessStats> delta;
130     for (const auto& it : *processStats) {
131         const ProcessStats& curStats = it.second;
132         const auto& cachedIt = mLastProcessStats.find(it.first);
133         if (cachedIt == mLastProcessStats.end() ||
134             cachedIt->second.process.startTime != curStats.process.startTime) {
135             // New/reused PID so don't calculate the delta.
136             delta.emplace_back(curStats);
137             continue;
138         }
139 
140         ProcessStats deltaStats = curStats;
141         const ProcessStats& cachedStats = cachedIt->second;
142         deltaStats.process.majorFaults -= cachedStats.process.majorFaults;
143         for (auto& deltaThread : deltaStats.threads) {
144             const auto& cachedThread = cachedStats.threads.find(deltaThread.first);
145             if (cachedThread == cachedStats.threads.end() ||
146                 cachedThread->second.startTime != deltaThread.second.startTime) {
147                 // New TID or TID reused by the same PID so don't calculate the delta.
148                 continue;
149             }
150             deltaThread.second.majorFaults -= cachedThread->second.majorFaults;
151         }
152         delta.emplace_back(deltaStats);
153     }
154     mLastProcessStats = *processStats;
155     return delta;
156 }
157 
getProcessStatsLocked() const158 Result<std::unordered_map<uint32_t, ProcessStats>> ProcPidStat::getProcessStatsLocked() const {
159     std::unordered_map<uint32_t, ProcessStats> processStats;
160     auto procDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(mPath.c_str()), closedir);
161     if (!procDirp) {
162         return Error() << "Failed to open " << mPath << " directory";
163     }
164     dirent* pidDir = nullptr;
165     while ((pidDir = readdir(procDirp.get())) != nullptr) {
166         // 1. Read top-level pid stats.
167         uint32_t pid = 0;
168         if (pidDir->d_type != DT_DIR || !ParseUint(pidDir->d_name, &pid)) {
169             continue;
170         }
171         ProcessStats curStats;
172         std::string path = StringPrintf((mPath + kStatFileFormat).c_str(), pid);
173         const auto& ret = readPidStatFile(path, &curStats.process);
174         if (!ret) {
175             // PID may disappear between scanning the directory and parsing the stat file.
176             // Thus treat ERR_FILE_OPEN_READ errors as soft errors.
177             if (ret.error().code() != ERR_FILE_OPEN_READ) {
178                 return Error() << "Failed to read top-level per-process stat file: "
179                                << ret.error().message().c_str();
180             }
181             ALOGW("Failed to read top-level per-process stat file %s: %s", path.c_str(),
182                   ret.error().message().c_str());
183             continue;
184         }
185 
186         // 2. When not found in the cache, fetch tgid/UID as soon as possible because processes
187         // may terminate during scanning.
188         const auto& it = mLastProcessStats.find(curStats.process.pid);
189         if (it == mLastProcessStats.end() ||
190             it->second.process.startTime != curStats.process.startTime || it->second.tgid == -1 ||
191             it->second.uid == -1) {
192             const auto& ret = getPidStatusLocked(&curStats);
193             if (!ret) {
194                 if (ret.error().code() != ERR_FILE_OPEN_READ) {
195                     return Error() << "Failed to read pid status for pid " << curStats.process.pid
196                                    << ": " << ret.error().message().c_str();
197                 }
198                 ALOGW("Failed to read pid status for pid %" PRIu32 ": %s", curStats.process.pid,
199                       ret.error().message().c_str());
200                 // Default tgid and uid values are -1 (aka unknown).
201             }
202         } else {
203             // Fetch from cache.
204             curStats.tgid = it->second.tgid;
205             curStats.uid = it->second.uid;
206         }
207 
208         if (curStats.tgid != -1 && curStats.tgid != curStats.process.pid) {
209             ALOGW("Skipping non-process (i.e., Tgid != PID) entry for PID %" PRIu32,
210                   curStats.process.pid);
211             continue;
212         }
213 
214         // 3. Fetch per-thread stats.
215         std::string taskDir = StringPrintf((mPath + kTaskDirFormat).c_str(), pid);
216         auto taskDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(taskDir.c_str()), closedir);
217         if (!taskDirp) {
218             // Treat this as a soft error so at least the process stats will be collected.
219             ALOGW("Failed to open %s directory", taskDir.c_str());
220         }
221         dirent* tidDir = nullptr;
222         bool didReadMainThread = false;
223         while (taskDirp != nullptr && (tidDir = readdir(taskDirp.get())) != nullptr) {
224             uint32_t tid = 0;
225             if (tidDir->d_type != DT_DIR || !ParseUint(tidDir->d_name, &tid)) {
226                 continue;
227             }
228             if (processStats.find(tid) != processStats.end()) {
229                 return Error() << "Process stats already exists for TID " << tid
230                                << ". Stats will be double counted";
231             }
232 
233             PidStat curThreadStat = {};
234             path = StringPrintf((taskDir + kStatFileFormat).c_str(), tid);
235             const auto& ret = readPidStatFile(path, &curThreadStat);
236             if (!ret) {
237                 if (ret.error().code() != ERR_FILE_OPEN_READ) {
238                     return Error() << "Failed to read per-thread stat file: "
239                                    << ret.error().message().c_str();
240                 }
241                 // Maybe the thread terminated before reading the file so skip this thread and
242                 // continue with scanning the next thread's stat.
243                 ALOGW("Failed to read per-thread stat file %s: %s", path.c_str(),
244                       ret.error().message().c_str());
245                 continue;
246             }
247             if (curThreadStat.pid == curStats.process.pid) {
248                 didReadMainThread = true;
249             }
250             curStats.threads[curThreadStat.pid] = curThreadStat;
251         }
252         if (!didReadMainThread) {
253             // In the event of failure to read main-thread info (mostly because the process
254             // terminated during scanning/parsing), fill out the stat that are common between main
255             // thread and the process.
256             curStats.threads[curStats.process.pid] = PidStat{
257                     .pid = curStats.process.pid,
258                     .comm = curStats.process.comm,
259                     .state = curStats.process.state,
260                     .ppid = curStats.process.ppid,
261                     .numThreads = curStats.process.numThreads,
262                     .startTime = curStats.process.startTime,
263             };
264         }
265         processStats[curStats.process.pid] = curStats;
266     }
267     return processStats;
268 }
269 
getPidStatusLocked(ProcessStats * processStats) const270 Result<void> ProcPidStat::getPidStatusLocked(ProcessStats* processStats) const {
271     std::string buffer;
272     std::string path = StringPrintf((mPath + kStatusFileFormat).c_str(), processStats->process.pid);
273     if (!ReadFileToString(path, &buffer)) {
274         return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
275     }
276     std::vector<std::string> lines = Split(std::move(buffer), "\n");
277     bool didReadUid = false;
278     bool didReadTgid = false;
279     for (size_t i = 0; i < lines.size(); ++i) {
280         if (lines[i].empty()) {
281             continue;
282         }
283         if (!lines[i].compare(0, 4, "Uid:")) {
284             if (didReadUid) {
285                 return Error(ERR_INVALID_FILE)
286                         << "Duplicate UID line: \"" << lines[i] << "\" in file " << path;
287             }
288             std::vector<std::string> fields = Split(lines[i], "\t");
289             if (fields.size() < 2 || !ParseInt(fields[1], &processStats->uid)) {
290                 return Error(ERR_INVALID_FILE)
291                         << "Invalid UID line: \"" << lines[i] << "\" in file " << path;
292             }
293             didReadUid = true;
294         } else if (!lines[i].compare(0, 5, "Tgid:")) {
295             if (didReadTgid) {
296                 return Error(ERR_INVALID_FILE)
297                         << "Duplicate Tgid line: \"" << lines[i] << "\" in file" << path;
298             }
299             std::vector<std::string> fields = Split(lines[i], "\t");
300             if (fields.size() != 2 || !ParseInt(fields[1], &processStats->tgid)) {
301                 return Error(ERR_INVALID_FILE)
302                         << "Invalid tgid line: \"" << lines[i] << "\" in file" << path;
303             }
304             didReadTgid = true;
305         }
306     }
307     if (!didReadUid || !didReadTgid) {
308         return Error(ERR_INVALID_FILE) << "Incomplete file " << mPath + kStatusFileFormat;
309     }
310     return {};
311 }
312 
313 }  // namespace watchdog
314 }  // namespace automotive
315 }  // namespace android
316