1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server.devicepolicy;
18 
19 import static java.util.concurrent.TimeUnit.MILLISECONDS;
20 import static java.util.concurrent.TimeUnit.NANOSECONDS;
21 
22 import android.app.admin.DeviceAdminReceiver;
23 import android.app.admin.SecurityLog;
24 import android.app.admin.SecurityLog.SecurityEvent;
25 import android.os.Process;
26 import android.os.SystemClock;
27 import android.util.Log;
28 import android.util.Slog;
29 
30 import com.android.internal.annotations.GuardedBy;
31 import com.android.internal.annotations.VisibleForTesting;
32 
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.List;
36 import java.util.concurrent.Semaphore;
37 import java.util.concurrent.TimeUnit;
38 import java.util.concurrent.locks.Lock;
39 import java.util.concurrent.locks.ReentrantLock;
40 
41 /**
42  * A class managing access to the security logs. It maintains an internal buffer of pending
43  * logs to be retrieved by the device owner. The logs are retrieved from the logd daemon via
44  * JNI binding, and kept until device owner has retrieved to prevent loss of logs. Access to
45  * the logs from the device owner is rate-limited, and device owner is notified when the logs
46  * are ready to be retrieved. This happens every two hours, or when our internal buffer is
47  * larger than a certain threshold.
48  */
49 class SecurityLogMonitor implements Runnable {
50     private final DevicePolicyManagerService mService;
51 
52     private final Lock mLock = new ReentrantLock();
53 
SecurityLogMonitor(DevicePolicyManagerService service)54     SecurityLogMonitor(DevicePolicyManagerService service) {
55         this(service, 0 /* id */);
56     }
57 
58     @VisibleForTesting
SecurityLogMonitor(DevicePolicyManagerService service, long id)59     SecurityLogMonitor(DevicePolicyManagerService service, long id) {
60         this.mService = service;
61         this.mId = id;
62         this.mLastForceNanos = System.nanoTime();
63     }
64 
65     private static final boolean DEBUG = false;  // STOPSHIP if true.
66     private static final String TAG = "SecurityLogMonitor";
67     /**
68      * Each log entry can hold up to 4K bytes (but as of {@link android.os.Build.VERSION_CODES#N}
69      * it should be less than 100 bytes), setting 1024 entries as the threshold to notify Device
70      * Owner.
71      */
72     @VisibleForTesting static final int BUFFER_ENTRIES_NOTIFICATION_LEVEL = 1024;
73     /**
74      * The maximum number of entries we should store before dropping earlier logs, to limit the
75      * memory usage.
76      */
77     private static final int BUFFER_ENTRIES_MAXIMUM_LEVEL = BUFFER_ENTRIES_NOTIFICATION_LEVEL * 10;
78     /**
79      * Critical log buffer level, 90% of capacity.
80      */
81     private static final int BUFFER_ENTRIES_CRITICAL_LEVEL = BUFFER_ENTRIES_MAXIMUM_LEVEL * 9 / 10;
82     /**
83      * How often should Device Owner be notified under normal circumstances.
84      */
85     private static final long RATE_LIMIT_INTERVAL_MS = TimeUnit.HOURS.toMillis(2);
86     /**
87      * How often to retry the notification about available logs if it is ignored or missed by DO.
88      */
89     private static final long BROADCAST_RETRY_INTERVAL_MS = TimeUnit.MINUTES.toMillis(30);
90     /**
91      * Internally how often should the monitor poll the security logs from logd.
92      */
93     private static final long POLLING_INTERVAL_MS = TimeUnit.MINUTES.toMillis(1);
94     /**
95      * Overlap between two subsequent log requests, required to avoid losing out of order events.
96      */
97     private static final long OVERLAP_NS = TimeUnit.SECONDS.toNanos(3);
98 
99     /** Minimum time between forced fetch attempts. */
100     private static final long FORCE_FETCH_THROTTLE_NS = TimeUnit.SECONDS.toNanos(10);
101 
102     @GuardedBy("mLock")
103     private Thread mMonitorThread = null;
104     @GuardedBy("mLock")
105     private ArrayList<SecurityEvent> mPendingLogs = new ArrayList<>();
106     @GuardedBy("mLock")
107     private long mId;
108     @GuardedBy("mLock")
109     private boolean mAllowedToRetrieve = false;
110 
111     // Whether we have already logged the fact that log buffer reached 90%, to avoid dupes.
112     @GuardedBy("mLock")
113     private boolean mCriticalLevelLogged = false;
114 
115     /**
116      * Last events fetched from log to check for overlap between batches. We can leave it empty if
117      * we are sure there will be no overlap anymore, e.g. when we get empty batch.
118      */
119     private final ArrayList<SecurityEvent> mLastEvents = new ArrayList<>();
120     /** Timestamp of the very last event, -1 means request from the beginning of time. */
121     private long mLastEventNanos = -1;
122 
123     /**
124      * When DO will be allowed to retrieve the log, in milliseconds since boot (as per
125      * {@link SystemClock#elapsedRealtime()}). After that it will mark the time to retry broadcast.
126      */
127     @GuardedBy("mLock")
128     private long mNextAllowedRetrievalTimeMillis = -1;
129     @GuardedBy("mLock")
130     private boolean mPaused = false;
131 
132     /** Semaphore used to force log fetching on request from adb. */
133     private final Semaphore mForceSemaphore = new Semaphore(0 /* permits */);
134 
135     /** The last timestamp when force fetch was used, to prevent abuse. */
136     @GuardedBy("mForceSemaphore")
137     private long mLastForceNanos = 0;
138 
start()139     void start() {
140         Slog.i(TAG, "Starting security logging.");
141         SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STARTED);
142         mLock.lock();
143         try {
144             if (mMonitorThread == null) {
145                 mPendingLogs = new ArrayList<>();
146                 mCriticalLevelLogged = false;
147                 mId = 0;
148                 mAllowedToRetrieve = false;
149                 mNextAllowedRetrievalTimeMillis = -1;
150                 mPaused = false;
151 
152                 mMonitorThread = new Thread(this);
153                 mMonitorThread.start();
154             }
155         } finally {
156             mLock.unlock();
157         }
158     }
159 
stop()160     void stop() {
161         Slog.i(TAG, "Stopping security logging.");
162         SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STOPPED);
163         mLock.lock();
164         try {
165             if (mMonitorThread != null) {
166                 mMonitorThread.interrupt();
167                 try {
168                     mMonitorThread.join(TimeUnit.SECONDS.toMillis(5));
169                 } catch (InterruptedException e) {
170                     Log.e(TAG, "Interrupted while waiting for thread to stop", e);
171                 }
172                 // Reset state and clear buffer
173                 mPendingLogs = new ArrayList<>();
174                 mId = 0;
175                 mAllowedToRetrieve = false;
176                 mNextAllowedRetrievalTimeMillis = -1;
177                 mPaused = false;
178                 mMonitorThread = null;
179             }
180         } finally {
181             mLock.unlock();
182         }
183     }
184 
185     /**
186      * If logs are being collected, keep collecting them but stop notifying the device owner that
187      * new logs are available (since they cannot be retrieved).
188      */
pause()189     void pause() {
190         Slog.i(TAG, "Paused.");
191 
192         mLock.lock();
193         mPaused = true;
194         mAllowedToRetrieve = false;
195         mLock.unlock();
196     }
197 
198     /**
199      * If logs are being collected, start notifying the device owner when logs are ready to be
200      * retrieved again (if it was paused).
201      * <p>If logging is enabled and there are logs ready to be retrieved, this method will attempt
202      * to notify the device owner. Therefore calling identity should be cleared before calling it
203      * (in case the method is called from a user other than the DO's user).
204      */
resume()205     void resume() {
206         mLock.lock();
207         try {
208             if (!mPaused) {
209                 Log.d(TAG, "Attempted to resume, but logging is not paused.");
210                 return;
211             }
212             mPaused = false;
213             mAllowedToRetrieve = false;
214         } finally {
215             mLock.unlock();
216         }
217 
218         Slog.i(TAG, "Resumed.");
219         try {
220             notifyDeviceOwnerIfNeeded(false /* force */);
221         } catch (InterruptedException e) {
222             Log.w(TAG, "Thread interrupted.", e);
223         }
224     }
225 
226     /**
227      * Discard all collected logs.
228      */
discardLogs()229     void discardLogs() {
230         mLock.lock();
231         mAllowedToRetrieve = false;
232         mPendingLogs = new ArrayList<>();
233         mCriticalLevelLogged = false;
234         mLock.unlock();
235         Slog.i(TAG, "Discarded all logs.");
236     }
237 
238     /**
239      * Returns the new batch of logs since the last call to this method. Returns null if
240      * rate limit is exceeded.
241      */
retrieveLogs()242     List<SecurityEvent> retrieveLogs() {
243         mLock.lock();
244         try {
245             if (mAllowedToRetrieve) {
246                 mAllowedToRetrieve = false;
247                 mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime()
248                         + RATE_LIMIT_INTERVAL_MS;
249                 List<SecurityEvent> result = mPendingLogs;
250                 mPendingLogs = new ArrayList<>();
251                 mCriticalLevelLogged = false;
252                 return result;
253             } else {
254                 return null;
255             }
256         } finally {
257             mLock.unlock();
258         }
259     }
260 
261     /**
262      * Requests the next (or the first) batch of events from the log with appropriate timestamp.
263      */
getNextBatch(ArrayList<SecurityEvent> newLogs)264     private void getNextBatch(ArrayList<SecurityEvent> newLogs) throws IOException {
265         if (mLastEventNanos < 0) {
266             // Non-blocking read that returns all logs immediately.
267             if (DEBUG) Slog.d(TAG, "SecurityLog.readEvents");
268             SecurityLog.readEvents(newLogs);
269         } else {
270             // If we have last events from the previous batch, request log events with time overlap
271             // with previously retrieved messages to avoid losing events due to reordering in logd.
272             final long startNanos = mLastEvents.isEmpty()
273                     ? mLastEventNanos : Math.max(0, mLastEventNanos - OVERLAP_NS);
274             if (DEBUG) Slog.d(TAG, "SecurityLog.readEventsSince: " + startNanos);
275             // Non-blocking read that returns all logs with timestamps >= startNanos immediately.
276             SecurityLog.readEventsSince(startNanos, newLogs);
277         }
278 
279         // Sometimes events may be reordered in logd due to simultaneous readers and writers. In
280         // this case, we have to sort it to make overlap checking work. This is very unlikely.
281         for (int i = 0; i < newLogs.size() - 1; i++) {
282             if (newLogs.get(i).getTimeNanos() > newLogs.get(i+1).getTimeNanos()) {
283                 if (DEBUG) Slog.d(TAG, "Got out of order events, sorting.");
284                 // Sort using comparator that compares timestamps.
285                 newLogs.sort((e1, e2) -> Long.signum(e1.getTimeNanos() - e2.getTimeNanos()));
286                 break;
287             }
288         }
289 
290         if (DEBUG) Slog.d(TAG, "Got " + newLogs.size() + " new events.");
291     }
292 
293     /**
294      * Save the last events for overlap checking with the next batch.
295      */
saveLastEvents(ArrayList<SecurityEvent> newLogs)296     private void saveLastEvents(ArrayList<SecurityEvent> newLogs) {
297         mLastEvents.clear();
298         if (newLogs.isEmpty()) {
299             // This can happen if no events were logged yet or the buffer got cleared. In this case
300             // we aren't going to have any overlap next time, leave mLastEvents events empty.
301             return;
302         }
303 
304         // Save the last timestamp.
305         mLastEventNanos = newLogs.get(newLogs.size() - 1).getTimeNanos();
306         // Position of the earliest event that has to be saved. Start from the penultimate event,
307         // going backward.
308         int pos = newLogs.size() - 2;
309         while (pos >= 0 && mLastEventNanos - newLogs.get(pos).getTimeNanos() < OVERLAP_NS) {
310             pos--;
311         }
312         // We either run past the start of the list or encountered an event that is too old to keep.
313         pos++;
314         mLastEvents.addAll(newLogs.subList(pos, newLogs.size()));
315         if (DEBUG) Slog.d(TAG, mLastEvents.size() + " events saved for overlap check");
316     }
317 
318     /**
319      * Merges a new batch into already fetched logs and deals with overlapping and out of order
320      * events.
321      */
322     @GuardedBy("mLock")
mergeBatchLocked(final ArrayList<SecurityEvent> newLogs)323     private void mergeBatchLocked(final ArrayList<SecurityEvent> newLogs) {
324         // Reserve capacity so that copying doesn't occur.
325         mPendingLogs.ensureCapacity(mPendingLogs.size() + newLogs.size());
326         // Run through the first events of the batch to check if there is an overlap with previous
327         // batch and if so, skip overlapping events. Events are sorted by timestamp, so we can
328         // compare it in linear time by advancing two pointers, one for each batch.
329         int curPos = 0;
330         int lastPos = 0;
331         // For the first batch mLastEvents will be empty, so no iterations will happen.
332         while (lastPos < mLastEvents.size() && curPos < newLogs.size()) {
333             final SecurityEvent curEvent = newLogs.get(curPos);
334             final long currentNanos = curEvent.getTimeNanos();
335             if (currentNanos > mLastEventNanos) {
336                 // We got past the last event of the last batch, no overlap possible anymore.
337                 break;
338             }
339             final SecurityEvent lastEvent = mLastEvents.get(lastPos);
340             final long lastNanos = lastEvent.getTimeNanos();
341             if (lastNanos > currentNanos) {
342                 // New event older than the last we've seen so far, must be due to reordering.
343                 if (DEBUG) Slog.d(TAG, "New event in the overlap: " + currentNanos);
344                 assignLogId(curEvent);
345                 mPendingLogs.add(curEvent);
346                 curPos++;
347             } else if (lastNanos < currentNanos) {
348                 if (DEBUG) Slog.d(TAG, "Event disappeared from the overlap: " + lastNanos);
349                 lastPos++;
350             } else {
351                 // Two events have the same timestamp, check if they are the same.
352                 if (lastEvent.equals(curEvent)) {
353                     // Actual overlap, just skip the event.
354                     if (DEBUG) Slog.d(TAG, "Skipped dup event with timestamp: " + lastNanos);
355                 } else {
356                     // Wow, what a coincidence, or probably the clock is too coarse.
357                     assignLogId(curEvent);
358                     mPendingLogs.add(curEvent);
359                     if (DEBUG) Slog.d(TAG, "Event timestamp collision: " + lastNanos);
360                 }
361                 lastPos++;
362                 curPos++;
363             }
364         }
365         // Assign an id to the new logs, after the overlap with mLastEvents.
366         List<SecurityEvent> idLogs = newLogs.subList(curPos, newLogs.size());
367         for (SecurityEvent event : idLogs) {
368             assignLogId(event);
369         }
370         // Save the rest of the new batch.
371         mPendingLogs.addAll(idLogs);
372 
373         checkCriticalLevel();
374 
375         if (mPendingLogs.size() > BUFFER_ENTRIES_MAXIMUM_LEVEL) {
376             // Truncate buffer down to half of BUFFER_ENTRIES_MAXIMUM_LEVEL.
377             mPendingLogs = new ArrayList<>(mPendingLogs.subList(
378                     mPendingLogs.size() - (BUFFER_ENTRIES_MAXIMUM_LEVEL / 2),
379                     mPendingLogs.size()));
380             mCriticalLevelLogged = false;
381             Slog.i(TAG, "Pending logs buffer full. Discarding old logs.");
382         }
383         if (DEBUG) Slog.d(TAG, mPendingLogs.size() + " pending events in the buffer after merging,"
384                 + " with ids " + mPendingLogs.get(0).getId()
385                 + " to " + mPendingLogs.get(mPendingLogs.size() - 1).getId());
386     }
387 
388     @GuardedBy("mLock")
checkCriticalLevel()389     private void checkCriticalLevel() {
390         if (!SecurityLog.isLoggingEnabled()) {
391             return;
392         }
393 
394         if (mPendingLogs.size() >= BUFFER_ENTRIES_CRITICAL_LEVEL) {
395             if (!mCriticalLevelLogged) {
396                 mCriticalLevelLogged = true;
397                 SecurityLog.writeEvent(SecurityLog.TAG_LOG_BUFFER_SIZE_CRITICAL);
398             }
399         }
400     }
401 
402     @GuardedBy("mLock")
assignLogId(SecurityEvent event)403     private void assignLogId(SecurityEvent event) {
404         event.setId(mId);
405         if (mId == Long.MAX_VALUE) {
406             Slog.i(TAG, "Reached maximum id value; wrapping around.");
407             mId = 0;
408         } else {
409             mId++;
410         }
411     }
412 
413     @Override
run()414     public void run() {
415         Process.setThreadPriority(Process.THREAD_PRIORITY_BACKGROUND);
416 
417         final ArrayList<SecurityEvent> newLogs = new ArrayList<>();
418         while (!Thread.currentThread().isInterrupted()) {
419             try {
420                 final boolean force = mForceSemaphore.tryAcquire(POLLING_INTERVAL_MS, MILLISECONDS);
421 
422                 getNextBatch(newLogs);
423 
424                 mLock.lockInterruptibly();
425                 try {
426                     mergeBatchLocked(newLogs);
427                 } finally {
428                     mLock.unlock();
429                 }
430 
431                 saveLastEvents(newLogs);
432                 newLogs.clear();
433                 notifyDeviceOwnerIfNeeded(force);
434             } catch (IOException e) {
435                 Log.e(TAG, "Failed to read security log", e);
436             } catch (InterruptedException e) {
437                 Log.i(TAG, "Thread interrupted, exiting.", e);
438                 // We are asked to stop.
439                 break;
440             }
441         }
442 
443         // Discard previous batch info.
444         mLastEvents.clear();
445         if (mLastEventNanos != -1) {
446             // Make sure we don't read old events if logging is re-enabled. Since mLastEvents is
447             // empty, the next request will be done without overlap, so it is enough to add 1 ns.
448             mLastEventNanos += 1;
449         }
450 
451         Slog.i(TAG, "MonitorThread exit.");
452     }
453 
notifyDeviceOwnerIfNeeded(boolean force)454     private void notifyDeviceOwnerIfNeeded(boolean force) throws InterruptedException {
455         boolean allowRetrievalAndNotifyDO = false;
456         mLock.lockInterruptibly();
457         try {
458             if (mPaused) {
459                 return;
460             }
461             final int logSize = mPendingLogs.size();
462             if (logSize >= BUFFER_ENTRIES_NOTIFICATION_LEVEL || (force && logSize > 0)) {
463                 // Allow DO to retrieve logs if too many pending logs or if forced.
464                 if (!mAllowedToRetrieve) {
465                     allowRetrievalAndNotifyDO = true;
466                 }
467                 if (DEBUG) Slog.d(TAG, "Number of log entries over threshold: " + logSize);
468             }
469             if (logSize > 0 && SystemClock.elapsedRealtime() >= mNextAllowedRetrievalTimeMillis) {
470                 // Rate limit reset
471                 allowRetrievalAndNotifyDO = true;
472                 if (DEBUG) Slog.d(TAG, "Timeout reached");
473             }
474             if (allowRetrievalAndNotifyDO) {
475                 mAllowedToRetrieve = true;
476                 // Set the timeout to retry the notification if the DO misses it.
477                 mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime()
478                         + BROADCAST_RETRY_INTERVAL_MS;
479             }
480         } finally {
481             mLock.unlock();
482         }
483         if (allowRetrievalAndNotifyDO) {
484             Slog.i(TAG, "notify DO");
485             mService.sendDeviceOwnerCommand(DeviceAdminReceiver.ACTION_SECURITY_LOGS_AVAILABLE,
486                     null);
487         }
488     }
489 
490     /**
491      * Forces the logs to be fetched and made available. Returns 0 on success or timeout to wait
492      * before attempting in milliseconds.
493      */
forceLogs()494     public long forceLogs() {
495         final long nowNanos = System.nanoTime();
496         // We only synchronize with another calls to this function, not with the fetching thread.
497         synchronized (mForceSemaphore) {
498             final long toWaitNanos = mLastForceNanos + FORCE_FETCH_THROTTLE_NS - nowNanos;
499             if (toWaitNanos > 0) {
500                 return NANOSECONDS.toMillis(toWaitNanos) + 1; // Round up.
501             }
502             mLastForceNanos = nowNanos;
503             // There is a race condition with the fetching thread below, but if the last permit is
504             // acquired just after we do the check, logs are forced anyway and that's what we need.
505             if (mForceSemaphore.availablePermits() == 0) {
506                 mForceSemaphore.release();
507             }
508             return 0;
509         }
510     }
511 }
512