1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * A service that exchanges time synchronization information between
19  * a master that defines a timeline and clients that follow the timeline.
20  */
21 
22 #define LOG_TAG "common_time"
23 #include <utils/Log.h>
24 
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <fcntl.h>
28 #include <inttypes.h>
29 #include <linux/if_ether.h>
30 #include <net/if.h>
31 #include <net/if_arp.h>
32 #include <netinet/ip.h>
33 #include <poll.h>
34 #include <stdio.h>
35 #include <sys/eventfd.h>
36 #include <sys/ioctl.h>
37 #include <sys/stat.h>
38 #include <sys/types.h>
39 #include <sys/socket.h>
40 
41 #include <common_time/local_clock.h>
42 #include <binder/IPCThreadState.h>
43 #include <binder/ProcessState.h>
44 #include <utils/Timers.h>
45 
46 #include "common_clock_service.h"
47 #include "common_time_config_service.h"
48 #include "common_time_server.h"
49 #include "common_time_server_packets.h"
50 #include "clock_recovery.h"
51 #include "common_clock.h"
52 
53 #define MAX_INT ((int)0x7FFFFFFF)
54 
55 namespace android {
56 
57 const char*    CommonTimeServer::kDefaultMasterElectionAddr = "255.255.255.255";
58 const uint16_t CommonTimeServer::kDefaultMasterElectionPort = 8886;
59 const uint64_t CommonTimeServer::kDefaultSyncGroupID = 1;
60 const uint8_t  CommonTimeServer::kDefaultMasterPriority = 1;
61 const uint32_t CommonTimeServer::kDefaultMasterAnnounceIntervalMs = 10000;
62 const uint32_t CommonTimeServer::kDefaultSyncRequestIntervalMs = 1000;
63 const uint32_t CommonTimeServer::kDefaultPanicThresholdUsec = 50000;
64 const bool     CommonTimeServer::kDefaultAutoDisable = true;
65 const int      CommonTimeServer::kSetupRetryTimeoutMs = 30000;
66 const int64_t  CommonTimeServer::kNoGoodDataPanicThresholdUsec = 600000000ll;
67 const uint32_t CommonTimeServer::kRTTDiscardPanicThreshMultiplier = 5;
68 
69 // timeout value representing an infinite timeout
70 const int CommonTimeServer::kInfiniteTimeout = -1;
71 
72 /*** Initial state constants ***/
73 
74 // number of WhoIsMaster attempts sent before giving up
75 const int CommonTimeServer::kInitial_NumWhoIsMasterRetries = 6;
76 
77 // timeout used when waiting for a response to a WhoIsMaster request
78 const int CommonTimeServer::kInitial_WhoIsMasterTimeoutMs = 500;
79 
80 /*** Client state constants ***/
81 
82 // number of sync requests that can fail before a client assumes its master
83 // is dead
84 const int CommonTimeServer::kClient_NumSyncRequestRetries = 10;
85 
86 /*** Master state constants ***/
87 
88 /*** Ronin state constants ***/
89 
90 // number of WhoIsMaster attempts sent before declaring ourselves master
91 const int CommonTimeServer::kRonin_NumWhoIsMasterRetries = 20;
92 
93 // timeout used when waiting for a response to a WhoIsMaster request
94 const int CommonTimeServer::kRonin_WhoIsMasterTimeoutMs = 500;
95 
96 /*** WaitForElection state constants ***/
97 
98 // how long do we wait for an announcement from a master before
99 // trying another election?
100 const int CommonTimeServer::kWaitForElection_TimeoutMs = 12500;
101 
CommonTimeServer()102 CommonTimeServer::CommonTimeServer()
103     : Thread(false)
104     , mState(ICommonClock::STATE_INITIAL)
105     , mClockRecovery(&mLocalClock, &mCommonClock)
106     , mSocket(-1)
107     , mLastPacketRxLocalTime(0)
108     , mTimelineID(ICommonClock::kInvalidTimelineID)
109     , mClockSynced(false)
110     , mCommonClockHasClients(false)
111     , mStateChangeLog("Recent State Change Events", 30)
112     , mElectionLog("Recent Master Election Traffic", 30)
113     , mBadPktLog("Recent Bad Packet RX Info", 8)
114     , mInitial_WhoIsMasterRequestTimeouts(0)
115     , mClient_MasterDeviceID(0)
116     , mClient_MasterDevicePriority(0)
117     , mRonin_WhoIsMasterRequestTimeouts(0) {
118     // zero out sync stats
119     resetSyncStats();
120 
121     // Setup the master election endpoint to use the default.
122     struct sockaddr_in* meep =
123         reinterpret_cast<struct sockaddr_in*>(&mMasterElectionEP);
124     memset(&mMasterElectionEP, 0, sizeof(mMasterElectionEP));
125     inet_aton(kDefaultMasterElectionAddr, &meep->sin_addr);
126     meep->sin_family = AF_INET;
127     meep->sin_port   = htons(kDefaultMasterElectionPort);
128 
129     // Zero out the master endpoint.
130     memset(&mMasterEP, 0, sizeof(mMasterEP));
131     mMasterEPValid    = false;
132     mBindIfaceValid   = false;
133     setForceLowPriority(false);
134 
135     // Set all remaining configuration parameters to their defaults.
136     mDeviceID                 = 0;
137     mSyncGroupID              = kDefaultSyncGroupID;
138     mMasterPriority           = kDefaultMasterPriority;
139     mMasterAnnounceIntervalMs = kDefaultMasterAnnounceIntervalMs;
140     mSyncRequestIntervalMs    = kDefaultSyncRequestIntervalMs;
141     mPanicThresholdUsec       = kDefaultPanicThresholdUsec;
142     mAutoDisable              = kDefaultAutoDisable;
143 
144     // Create the eventfd we will use to signal our thread to wake up when
145     // needed.
146     mWakeupThreadFD = eventfd(0, EFD_NONBLOCK);
147 
148     // seed the random number generator (used to generated timeline IDs)
149     srand48(static_cast<unsigned int>(systemTime()));
150 }
151 
~CommonTimeServer()152 CommonTimeServer::~CommonTimeServer() {
153     shutdownThread();
154 
155     // No need to grab the lock here.  We are in the destructor; if the the user
156     // has a thread in any of the APIs while the destructor is being called,
157     // there is a threading problem a the application level we cannot reasonably
158     // do anything about.
159     cleanupSocket_l();
160 
161     if (mWakeupThreadFD >= 0) {
162         close(mWakeupThreadFD);
163         mWakeupThreadFD = -1;
164     }
165 }
166 
startServices()167 bool CommonTimeServer::startServices() {
168     // start the ICommonClock service
169     mICommonClock = CommonClockService::instantiate(*this);
170     if (mICommonClock == NULL)
171         return false;
172 
173     // start the ICommonTimeConfig service
174     mICommonTimeConfig = CommonTimeConfigService::instantiate(*this);
175     if (mICommonTimeConfig == NULL)
176         return false;
177 
178     return true;
179 }
180 
threadLoop()181 bool CommonTimeServer::threadLoop() {
182     // Register our service interfaces.
183     if (!startServices())
184         return false;
185 
186     // Hold the lock while we are in the main thread loop.  It will release the
187     // lock when it blocks, and hold the lock at all other times.
188     mLock.lock();
189     runStateMachine_l();
190     mLock.unlock();
191 
192     IPCThreadState::self()->stopProcess();
193     return false;
194 }
195 
runStateMachine_l()196 bool CommonTimeServer::runStateMachine_l() {
197     if (!mLocalClock.initCheck())
198         return false;
199 
200     if (!mCommonClock.init(mLocalClock.getLocalFreq()))
201         return false;
202 
203     // Enter the initial state.
204     becomeInitial("startup");
205 
206     // run the state machine
207     while (!exitPending()) {
208         struct pollfd pfds[2];
209         int rc, timeout;
210         int eventCnt = 0;
211         int64_t wakeupTime;
212         uint32_t t1, t2;
213         bool needHandleTimeout = false;
214 
215         // We are always interested in our wakeup FD.
216         pfds[eventCnt].fd      = mWakeupThreadFD;
217         pfds[eventCnt].events  = POLLIN;
218         pfds[eventCnt].revents = 0;
219         eventCnt++;
220 
221         // If we have a valid socket, then we are interested in what it has to
222         // say as well.
223         if (mSocket >= 0) {
224             pfds[eventCnt].fd      = mSocket;
225             pfds[eventCnt].events  = POLLIN;
226             pfds[eventCnt].revents = 0;
227             eventCnt++;
228         }
229 
230         t1 = static_cast<uint32_t>(mCurTimeout.msecTillTimeout());
231         t2 = static_cast<uint32_t>(mClockRecovery.applyRateLimitedSlew());
232         timeout = static_cast<int>(t1 < t2 ? t1 : t2);
233 
234         // Note, we were holding mLock when this function was called.  We
235         // release it only while we are blocking and hold it at all other times.
236         mLock.unlock();
237         rc          = poll(pfds, eventCnt, timeout);
238         wakeupTime  = mLocalClock.getLocalTime();
239         mLock.lock();
240 
241         // Is it time to shutdown?  If so, don't hesitate... just do it.
242         if (exitPending())
243             break;
244 
245         // Did the poll fail?  This should never happen and is fatal if it does.
246         if (rc < 0) {
247             ALOGE("%s:%d poll failed", __PRETTY_FUNCTION__, __LINE__);
248             return false;
249         }
250 
251         if (rc == 0) {
252             needHandleTimeout = !mCurTimeout.msecTillTimeout();
253             if (needHandleTimeout)
254                 mCurTimeout.setTimeout(kInfiniteTimeout);
255         }
256 
257         // Were we woken up on purpose?  If so, clear the eventfd with a read.
258         if (pfds[0].revents)
259             clearPendingWakeupEvents_l();
260 
261         // Is out bind address dirty?  If so, clean up our socket (if any).
262         // Alternatively, do we have an active socket but should be auto
263         // disabled?  If so, release the socket and enter the proper sync state.
264         bool droppedSocket = false;
265         if (mBindIfaceDirty || ((mSocket >= 0) && shouldAutoDisable())) {
266             cleanupSocket_l();
267             mBindIfaceDirty = false;
268             droppedSocket = true;
269         }
270 
271         // Do we not have a socket but should have one?  If so, try to set one
272         // up.
273         if ((mSocket < 0) && mBindIfaceValid && !shouldAutoDisable()) {
274             if (setupSocket_l()) {
275                 // Success!  We are now joining a new network (either coming
276                 // from no network, or coming from a potentially different
277                 // network).  Force our priority to be lower so that we defer to
278                 // any other masters which may already be on the network we are
279                 // joining.  Later, when we enter either the client or the
280                 // master state, we will clear this flag and go back to our
281                 // normal election priority.
282                 setForceLowPriority(true);
283                 switch (mState) {
284                     // If we were in initial (whether we had a immediately
285                     // before this network or not) we want to simply reset the
286                     // system and start again.  Forcing a transition from
287                     // INITIAL to INITIAL should do the job.
288                     case CommonClockService::STATE_INITIAL:
289                         becomeInitial("bound interface");
290                         break;
291 
292                     // If we were in the master state, then either we were the
293                     // master in a no-network situation, or we were the master
294                     // of a different network and have moved to a new interface.
295                     // In either case, immediately transition to Ronin at low
296                     // priority.  If there is no one in the network we just
297                     // joined, we will become master soon enough.  If there is,
298                     // we want to be certain to defer master status to the
299                     // existing timeline currently running on the network.
300                     //
301                     case CommonClockService::STATE_MASTER:
302                         becomeRonin("leaving networkless mode");
303                         break;
304 
305                     // If we were in any other state (CLIENT, RONIN, or
306                     // WAIT_FOR_ELECTION) then we must be moving from one
307                     // network to another.  We have lost our old master;
308                     // transition to RONIN in an attempt to find a new master.
309                     // If there are none out there, we will just assume
310                     // responsibility for the timeline we used to be a client
311                     // of.
312                     default:
313                         becomeRonin("bound interface");
314                         break;
315                 }
316             } else {
317                 // That's odd... we failed to set up our socket.  This could be
318                 // due to some transient network change which will work itself
319                 // out shortly; schedule a retry attempt in the near future.
320                 mCurTimeout.setTimeout(kSetupRetryTimeoutMs);
321             }
322 
323             // One way or the other, we don't have any data to process at this
324             // point (since we just tried to bulid a new socket).  Loop back
325             // around and wait for the next thing to do.
326             continue;
327         } else if (droppedSocket) {
328             // We just lost our socket, and for whatever reason (either no
329             // config, or auto disable engaged) we are not supposed to rebuild
330             // one at this time.  We are not going to rebuild our socket until
331             // something about our config/auto-disabled status changes, so we
332             // are basically in network-less mode.  If we are already in either
333             // INITIAL or MASTER, just stay there until something changes.  If
334             // we are in any other state (CLIENT, RONIN or WAIT_FOR_ELECTION),
335             // then transition to either INITIAL or MASTER depending on whether
336             // or not our timeline is valid.
337             mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
338                     "Entering networkless mode interface is %s, "
339                     "shouldAutoDisable = %s",
340                     mBindIfaceValid ? "valid" : "invalid",
341                     shouldAutoDisable() ? "true" : "false");
342             if ((mState != ICommonClock::STATE_INITIAL) &&
343                 (mState != ICommonClock::STATE_MASTER)) {
344                 if (mTimelineID == ICommonClock::kInvalidTimelineID)
345                     becomeInitial("network-less mode");
346                 else
347                     becomeMaster("network-less mode");
348             }
349 
350             continue;
351         }
352 
353         // Time to handle the timeouts?
354         if (needHandleTimeout) {
355             if (!handleTimeout())
356                 ALOGE("handleTimeout failed");
357             continue;
358         }
359 
360         // Does our socket have data for us (assuming we still have one, we
361         // may have RXed a packet at the same time as a config change telling us
362         // to shut our socket down)?  If so, process its data.
363         if ((mSocket >= 0) && (eventCnt > 1) && (pfds[1].revents)) {
364             mLastPacketRxLocalTime = wakeupTime;
365             if (!handlePacket())
366                 ALOGE("handlePacket failed");
367         }
368     }
369 
370     cleanupSocket_l();
371     return true;
372 }
373 
clearPendingWakeupEvents_l()374 void CommonTimeServer::clearPendingWakeupEvents_l() {
375     int64_t tmp;
376     read(mWakeupThreadFD, &tmp, sizeof(tmp));
377 }
378 
wakeupThread_l()379 void CommonTimeServer::wakeupThread_l() {
380     int64_t tmp = 1;
381     write(mWakeupThreadFD, &tmp, sizeof(tmp));
382 }
383 
cleanupSocket_l()384 void CommonTimeServer::cleanupSocket_l() {
385     if (mSocket >= 0) {
386         close(mSocket);
387         mSocket = -1;
388     }
389 }
390 
shutdownThread()391 void CommonTimeServer::shutdownThread() {
392     // Flag the work thread for shutdown.
393     this->requestExit();
394 
395     // Signal the thread in case its sleeping.
396     mLock.lock();
397     wakeupThread_l();
398     mLock.unlock();
399 
400     // Wait for the thread to exit.
401     this->join();
402 }
403 
setupSocket_l()404 bool CommonTimeServer::setupSocket_l() {
405     int rc;
406     bool ret_val = false;
407     struct sockaddr_in* ipv4_addr = NULL;
408     char masterElectionEPStr[64];
409     const int one = 1;
410 
411     // This should never be needed, but if we happened to have an old socket
412     // lying around, be sure to not leak it before proceeding.
413     cleanupSocket_l();
414 
415     // If we don't have a valid endpoint to bind to, then how did we get here in
416     // the first place?  Regardless, we know that we are going to fail to bind,
417     // so don't even try.
418     if (!mBindIfaceValid)
419         return false;
420 
421     sockaddrToString(mMasterElectionEP, true, masterElectionEPStr,
422                      sizeof(masterElectionEPStr));
423     mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
424                         "Building socket :: bind = %s master election = %s",
425                         mBindIface.string(), masterElectionEPStr);
426 
427     // TODO: add proper support for IPv6.  Right now, we block IPv6 addresses at
428     // the configuration interface level.
429     if (AF_INET != mMasterElectionEP.ss_family) {
430         mStateChangeLog.log(ANDROID_LOG_WARN, LOG_TAG,
431                             "TODO: add proper IPv6 support");
432         goto bailout;
433     }
434 
435     // open a UDP socket for the timeline serivce
436     mSocket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
437     if (mSocket < 0) {
438         mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
439                             "Failed to create socket (errno = %d)", errno);
440         goto bailout;
441     }
442 
443     // Bind to the selected interface using Linux's spiffy SO_BINDTODEVICE.
444     struct ifreq ifr;
445     memset(&ifr, 0, sizeof(ifr));
446     snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", mBindIface.string());
447     ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = 0;
448     rc = setsockopt(mSocket, SOL_SOCKET, SO_BINDTODEVICE,
449                     (void *)&ifr, sizeof(ifr));
450     if (rc) {
451         mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
452                             "Failed to bind socket at to interface %s "
453                             "(errno = %d)", ifr.ifr_name, errno);
454         goto bailout;
455     }
456 
457     // Bind our socket to INADDR_ANY and the master election port.  The
458     // interface binding we made using SO_BINDTODEVICE should limit us to
459     // traffic only on the interface we are interested in.  We need to bind to
460     // INADDR_ANY and the specific master election port in order to be able to
461     // receive both unicast traffic and master election multicast traffic with
462     // just a single socket.
463     struct sockaddr_in bindAddr;
464     ipv4_addr = reinterpret_cast<struct sockaddr_in*>(&mMasterElectionEP);
465     memcpy(&bindAddr, ipv4_addr, sizeof(bindAddr));
466     bindAddr.sin_addr.s_addr = INADDR_ANY;
467     rc = bind(mSocket,
468               reinterpret_cast<const sockaddr *>(&bindAddr),
469               sizeof(bindAddr));
470     if (rc) {
471         mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
472                             "Failed to bind socket to port %hu (errno = %d)",
473                             ntohs(bindAddr.sin_port), errno);
474         goto bailout;
475     }
476 
477     if (0xE0000000 == (ntohl(ipv4_addr->sin_addr.s_addr) & 0xF0000000)) {
478         // If our master election endpoint is a multicast address, be sure to join
479         // the multicast group.
480         struct ip_mreq mreq;
481         mreq.imr_multiaddr = ipv4_addr->sin_addr;
482         mreq.imr_interface.s_addr = htonl(INADDR_ANY);
483         rc = setsockopt(mSocket, IPPROTO_IP, IP_ADD_MEMBERSHIP,
484                         &mreq, sizeof(mreq));
485         if (rc == -1) {
486             ALOGE("Failed to join multicast group at %s.  (errno = %d)",
487                  masterElectionEPStr, errno);
488             goto bailout;
489         }
490 
491         // disable loopback of multicast packets
492         const int zero = 0;
493         rc = setsockopt(mSocket, IPPROTO_IP, IP_MULTICAST_LOOP,
494                         &zero, sizeof(zero));
495         if (rc == -1) {
496             mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
497                                 "Failed to disable multicast loopback "
498                                 "(errno = %d)", errno);
499             goto bailout;
500         }
501     } else
502     if (ntohl(ipv4_addr->sin_addr.s_addr) == 0xFFFFFFFF) {
503         // If the master election address is the broadcast address, then enable
504         // the broadcast socket option
505         rc = setsockopt(mSocket, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one));
506         if (rc == -1) {
507             mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
508                                 "Failed to enable broadcast (errno = %d)",
509                                 errno);
510             goto bailout;
511         }
512     } else {
513         // If the master election address is neither broadcast, nor multicast,
514         // then we are misconfigured.  The config API layer should prevent this
515         // from ever happening.
516         goto bailout;
517     }
518 
519     // Set the TTL of sent packets to 1.  (Time protocol sync should never leave
520     // the local subnet)
521     rc = setsockopt(mSocket, IPPROTO_IP, IP_TTL, &one, sizeof(one));
522     if (rc == -1) {
523         mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
524                             "Failed to set TTL to %d (errno = %d)", one, errno);
525         goto bailout;
526     }
527 
528     // get the device's unique ID
529     if (!assignDeviceID())
530         goto bailout;
531 
532     ret_val = true;
533 
534 bailout:
535     if (!ret_val)
536         cleanupSocket_l();
537     return ret_val;
538 }
539 
540 // generate a unique device ID that can be used for arbitration
assignDeviceID()541 bool CommonTimeServer::assignDeviceID() {
542     if (!mBindIfaceValid)
543         return false;
544 
545     struct ifreq ifr;
546     memset(&ifr, 0, sizeof(ifr));
547     ifr.ifr_addr.sa_family = AF_INET;
548     strlcpy(ifr.ifr_name, mBindIface.string(), IFNAMSIZ);
549 
550     int rc = ioctl(mSocket, SIOCGIFHWADDR, &ifr);
551     if (rc) {
552         ALOGE("%s:%d ioctl failed", __PRETTY_FUNCTION__, __LINE__);
553         return false;
554     }
555 
556     if (ifr.ifr_addr.sa_family != ARPHRD_ETHER) {
557         ALOGE("%s:%d got non-Ethernet address", __PRETTY_FUNCTION__, __LINE__);
558         return false;
559     }
560 
561     mDeviceID = 0;
562     for (int i = 0; i < ETH_ALEN; i++) {
563         mDeviceID = (mDeviceID << 8) | ifr.ifr_hwaddr.sa_data[i];
564     }
565 
566     return true;
567 }
568 
569 // generate a new timeline ID
assignTimelineID()570 void CommonTimeServer::assignTimelineID() {
571     do {
572         mTimelineID = (static_cast<uint64_t>(lrand48()) << 32)
573                     |  static_cast<uint64_t>(lrand48());
574     } while (mTimelineID == ICommonClock::kInvalidTimelineID);
575 }
576 
577 // Select a preference between the device IDs of two potential masters.
578 // Returns true if the first ID wins, or false if the second ID wins.
arbitrateMaster(uint64_t deviceID1,uint8_t devicePrio1,uint64_t deviceID2,uint8_t devicePrio2)579 bool CommonTimeServer::arbitrateMaster(
580         uint64_t deviceID1, uint8_t devicePrio1,
581         uint64_t deviceID2, uint8_t devicePrio2) {
582     return ((devicePrio1 >  devicePrio2) ||
583            ((devicePrio1 == devicePrio2) && (deviceID1 > deviceID2)));
584 }
585 
hexDumpToString(const uint8_t * src,size_t src_len,char * dst,size_t dst_len)586 static void hexDumpToString(const uint8_t* src, size_t src_len,
587                             char* dst, size_t dst_len) {
588     size_t offset = 0;
589     size_t i;
590 
591     for (i = 0; (i < src_len) && (offset < dst_len); ++i) {
592         int res;
593         if (0 == (i % 16)) {
594             res = snprintf(dst + offset, dst_len - offset, "\n%04zx :", i);
595             if (res < 0)
596                 break;
597             offset += res;
598             if (offset >= dst_len)
599                 break;
600         }
601 
602         res = snprintf(dst + offset, dst_len - offset, " %02x", src[i]);
603         if (res < 0)
604             break;
605         offset += res;
606     }
607 
608     dst[dst_len - 1] = 0;
609 }
610 
handlePacket()611 bool CommonTimeServer::handlePacket() {
612     uint8_t buf[256];
613     struct sockaddr_storage srcAddr;
614     socklen_t srcAddrLen = sizeof(srcAddr);
615 
616     ssize_t recvBytes = recvfrom(
617             mSocket, buf, sizeof(buf), 0,
618             reinterpret_cast<const sockaddr *>(&srcAddr), &srcAddrLen);
619 
620     if (recvBytes < 0) {
621         mBadPktLog.log(ANDROID_LOG_ERROR, LOG_TAG,
622                        "recvfrom failed (res %d, errno %d)",
623                        recvBytes, errno);
624         return false;
625     }
626 
627     UniversalTimeServicePacket pkt;
628     if (pkt.deserializePacket(buf, recvBytes, mSyncGroupID) < 0) {
629         char hex[256];
630         char srcEPStr[64];
631 
632         hexDumpToString(buf, static_cast<size_t>(recvBytes), hex, sizeof(hex));
633         sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
634 
635         mBadPktLog.log("Failed to parse %d byte packet from %s.%s",
636                        recvBytes, srcEPStr, hex);
637         return false;
638     }
639 
640     bool result;
641     switch (pkt.packetType) {
642         case TIME_PACKET_WHO_IS_MASTER_REQUEST:
643             result = handleWhoIsMasterRequest(&pkt.p.who_is_master_request,
644                                               srcAddr);
645             break;
646 
647         case TIME_PACKET_WHO_IS_MASTER_RESPONSE:
648             result = handleWhoIsMasterResponse(&pkt.p.who_is_master_response,
649                                                srcAddr);
650             break;
651 
652         case TIME_PACKET_SYNC_REQUEST:
653             result = handleSyncRequest(&pkt.p.sync_request, srcAddr);
654             break;
655 
656         case TIME_PACKET_SYNC_RESPONSE:
657             result = handleSyncResponse(&pkt.p.sync_response, srcAddr);
658             break;
659 
660         case TIME_PACKET_MASTER_ANNOUNCEMENT:
661             result = handleMasterAnnouncement(&pkt.p.master_announcement,
662                                               srcAddr);
663             break;
664 
665         default: {
666             char srcEPStr[64];
667             sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
668 
669             mBadPktLog.log(ANDROID_LOG_WARN, LOG_TAG,
670                            "unknown packet type (%d) from %s",
671                            pkt.packetType, srcEPStr);
672 
673             result = false;
674         } break;
675     }
676 
677     return result;
678 }
679 
handleTimeout()680 bool CommonTimeServer::handleTimeout() {
681     // If we have no socket, then this must be a timeout to retry socket setup.
682     if (mSocket < 0)
683         return true;
684 
685     switch (mState) {
686         case ICommonClock::STATE_INITIAL:
687             return handleTimeoutInitial();
688         case ICommonClock::STATE_CLIENT:
689             return handleTimeoutClient();
690         case ICommonClock::STATE_MASTER:
691             return handleTimeoutMaster();
692         case ICommonClock::STATE_RONIN:
693             return handleTimeoutRonin();
694         case ICommonClock::STATE_WAIT_FOR_ELECTION:
695             return handleTimeoutWaitForElection();
696     }
697 
698     return false;
699 }
700 
handleTimeoutInitial()701 bool CommonTimeServer::handleTimeoutInitial() {
702     if (++mInitial_WhoIsMasterRequestTimeouts ==
703             kInitial_NumWhoIsMasterRetries) {
704         // none of our attempts to discover a master succeeded, so make
705         // this device the master
706         return becomeMaster("initial timeout");
707     } else {
708         // retry the WhoIsMaster request
709         return sendWhoIsMasterRequest();
710     }
711 }
712 
handleTimeoutClient()713 bool CommonTimeServer::handleTimeoutClient() {
714     if (shouldPanicNotGettingGoodData())
715         return becomeInitial("timeout panic, no good data");
716 
717     if (mClient_SyncRequestPending) {
718         mClient_SyncRequestPending = false;
719 
720         if (++mClient_SyncRequestTimeouts < kClient_NumSyncRequestRetries) {
721             // a sync request has timed out, so retry
722             return sendSyncRequest();
723         } else {
724             // The master has failed to respond to a sync request for too many
725             // times in a row.  Assume the master is dead and start electing
726             // a new master.
727             return becomeRonin("master not responding");
728         }
729     } else {
730         // initiate the next sync request
731         return sendSyncRequest();
732     }
733 }
734 
handleTimeoutMaster()735 bool CommonTimeServer::handleTimeoutMaster() {
736     // send another announcement from the master
737     return sendMasterAnnouncement();
738 }
739 
handleTimeoutRonin()740 bool CommonTimeServer::handleTimeoutRonin() {
741     if (++mRonin_WhoIsMasterRequestTimeouts == kRonin_NumWhoIsMasterRetries) {
742         // no other master is out there, so we won the election
743         return becomeMaster("no better masters detected");
744     } else {
745         return sendWhoIsMasterRequest();
746     }
747 }
748 
handleTimeoutWaitForElection()749 bool CommonTimeServer::handleTimeoutWaitForElection() {
750     return becomeRonin("timeout waiting for election conclusion");
751 }
752 
handleWhoIsMasterRequest(const WhoIsMasterRequestPacket * request,const sockaddr_storage & srcAddr)753 bool CommonTimeServer::handleWhoIsMasterRequest(
754         const WhoIsMasterRequestPacket* request,
755         const sockaddr_storage& srcAddr) {
756     // Skip our own messages which come back via broadcast loopback.
757     if (request->senderDeviceID == mDeviceID)
758         return true;
759 
760     char srcEPStr[64];
761     sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
762     mElectionLog.log("RXed WhoIs master request while in state %s.  "
763                      "src %s reqTID %016llx ourTID %016llx",
764                      stateToString(mState), srcEPStr,
765                      request->timelineID, mTimelineID);
766 
767     if (mState == ICommonClock::STATE_MASTER) {
768         // is this request related to this master's timeline?
769         if (request->timelineID != ICommonClock::kInvalidTimelineID &&
770             request->timelineID != mTimelineID)
771             return true;
772 
773         WhoIsMasterResponsePacket pkt;
774         pkt.initHeader(mTimelineID, mSyncGroupID);
775         pkt.deviceID = mDeviceID;
776         pkt.devicePriority = effectivePriority();
777 
778         mElectionLog.log("TXing WhoIs master resp to %s while in state %s.  "
779                          "ourTID %016llx ourGID %016llx ourDID %016llx "
780                          "ourPrio %u",
781                          srcEPStr, stateToString(mState),
782                          mTimelineID, mSyncGroupID,
783                          pkt.deviceID, pkt.devicePriority);
784 
785         uint8_t buf[256];
786         ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
787         if (bufSz < 0)
788             return false;
789 
790         ssize_t sendBytes = sendto(
791                 mSocket, buf, bufSz, 0,
792                 reinterpret_cast<const sockaddr *>(&srcAddr),
793                 sizeof(srcAddr));
794         if (sendBytes == -1) {
795             ALOGE("%s:%d sendto failed", __PRETTY_FUNCTION__, __LINE__);
796             return false;
797         }
798     } else if (mState == ICommonClock::STATE_RONIN) {
799         // if we hear a WhoIsMaster request from another device following
800         // the same timeline and that device wins arbitration, then we will stop
801         // trying to elect ourselves master and will instead wait for an
802         // announcement from the election winner
803         if (request->timelineID != mTimelineID)
804             return true;
805 
806         if (arbitrateMaster(request->senderDeviceID,
807                             request->senderDevicePriority,
808                             mDeviceID,
809                             effectivePriority()))
810             return becomeWaitForElection("would lose election");
811 
812         return true;
813     } else if (mState == ICommonClock::STATE_INITIAL) {
814         // If a group of devices booted simultaneously (e.g. after a power
815         // outage) and all of them are in the initial state and there is no
816         // master, then each device may time out and declare itself master at
817         // the same time.  To avoid this, listen for
818         // WhoIsMaster(InvalidTimeline) requests from peers.  If we would lose
819         // arbitration against that peer, reset our timeout count so that the
820         // peer has a chance to become master before we time out.
821         if (request->timelineID == ICommonClock::kInvalidTimelineID &&
822                 arbitrateMaster(request->senderDeviceID,
823                                 request->senderDevicePriority,
824                                 mDeviceID,
825                                 effectivePriority())) {
826             mInitial_WhoIsMasterRequestTimeouts = 0;
827         }
828     }
829 
830     return true;
831 }
832 
handleWhoIsMasterResponse(const WhoIsMasterResponsePacket * response,const sockaddr_storage & srcAddr)833 bool CommonTimeServer::handleWhoIsMasterResponse(
834         const WhoIsMasterResponsePacket* response,
835         const sockaddr_storage& srcAddr) {
836     // Skip our own messages which come back via broadcast loopback.
837     if (response->deviceID == mDeviceID)
838         return true;
839 
840     char srcEPStr[64];
841     sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
842     mElectionLog.log("RXed WhoIs master response while in state %s.  "
843                      "src %s respTID %016llx respDID %016llx respPrio %u "
844                      "ourTID %016llx",
845                      stateToString(mState), srcEPStr,
846                      response->timelineID,
847                      response->deviceID,
848                      static_cast<uint32_t>(response->devicePriority),
849                      mTimelineID);
850 
851     if (mState == ICommonClock::STATE_INITIAL || mState == ICommonClock::STATE_RONIN) {
852         return becomeClient(srcAddr,
853                             response->deviceID,
854                             response->devicePriority,
855                             response->timelineID,
856                             "heard whois response");
857     } else if (mState == ICommonClock::STATE_CLIENT) {
858         // if we get multiple responses because there are multiple devices
859         // who believe that they are master, then follow the master that
860         // wins arbitration
861         if (arbitrateMaster(response->deviceID,
862                             response->devicePriority,
863                             mClient_MasterDeviceID,
864                             mClient_MasterDevicePriority)) {
865             return becomeClient(srcAddr,
866                                 response->deviceID,
867                                 response->devicePriority,
868                                 response->timelineID,
869                                 "heard whois response");
870         }
871     }
872 
873     return true;
874 }
875 
handleSyncRequest(const SyncRequestPacket * request,const sockaddr_storage & srcAddr)876 bool CommonTimeServer::handleSyncRequest(const SyncRequestPacket* request,
877                                          const sockaddr_storage& srcAddr) {
878     SyncResponsePacket pkt;
879     pkt.initHeader(mTimelineID, mSyncGroupID);
880 
881     if ((mState == ICommonClock::STATE_MASTER) &&
882         (mTimelineID == request->timelineID)) {
883         int64_t rxLocalTime = mLastPacketRxLocalTime;
884         int64_t rxCommonTime;
885 
886         // If we are master on an actual network and have actual clients, then
887         // we are no longer low priority.
888         setForceLowPriority(false);
889 
890         if (OK != mCommonClock.localToCommon(rxLocalTime, &rxCommonTime)) {
891             return false;
892         }
893 
894         int64_t txLocalTime = mLocalClock.getLocalTime();;
895         int64_t txCommonTime;
896         if (OK != mCommonClock.localToCommon(txLocalTime, &txCommonTime)) {
897             return false;
898         }
899 
900         pkt.nak = 0;
901         pkt.clientTxLocalTime  = request->clientTxLocalTime;
902         pkt.masterRxCommonTime = rxCommonTime;
903         pkt.masterTxCommonTime = txCommonTime;
904     } else {
905         pkt.nak = 1;
906         pkt.clientTxLocalTime  = 0;
907         pkt.masterRxCommonTime = 0;
908         pkt.masterTxCommonTime = 0;
909     }
910 
911     uint8_t buf[256];
912     ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
913     if (bufSz < 0)
914         return false;
915 
916     ssize_t sendBytes = sendto(
917             mSocket, &buf, bufSz, 0,
918             reinterpret_cast<const sockaddr *>(&srcAddr),
919             sizeof(srcAddr));
920     if (sendBytes == -1) {
921         ALOGE("%s:%d sendto failed", __PRETTY_FUNCTION__, __LINE__);
922         return false;
923     }
924 
925     return true;
926 }
927 
handleSyncResponse(const SyncResponsePacket * response,const sockaddr_storage & srcAddr)928 bool CommonTimeServer::handleSyncResponse(
929         const SyncResponsePacket* response,
930         const sockaddr_storage& srcAddr) {
931     if (mState != ICommonClock::STATE_CLIENT)
932         return true;
933 
934     assert(mMasterEPValid);
935     if (!sockaddrMatch(srcAddr, mMasterEP, true)) {
936         char srcEP[64], expectedEP[64];
937         sockaddrToString(srcAddr, true, srcEP, sizeof(srcEP));
938         sockaddrToString(mMasterEP, true, expectedEP, sizeof(expectedEP));
939         ALOGI("Dropping sync response from unexpected address."
940              " Expected %s Got %s", expectedEP, srcEP);
941         return true;
942     }
943 
944     if (response->nak) {
945         // if our master is no longer accepting requests, then we need to find
946         // a new master
947         return becomeRonin("master NAK'ed");
948     }
949 
950     mClient_SyncRequestPending = 0;
951     mClient_SyncRequestTimeouts = 0;
952     mClient_PacketRTTLog.logRX(response->clientTxLocalTime,
953                                mLastPacketRxLocalTime);
954 
955     bool result;
956     if (!(mClient_SyncRespsRXedFromCurMaster++)) {
957         // the first request/response exchange between a client and a master
958         // may take unusually long due to ARP, so discard it.
959         result = true;
960     } else {
961         int64_t clientTxLocalTime  = response->clientTxLocalTime;
962         int64_t clientRxLocalTime  = mLastPacketRxLocalTime;
963         int64_t masterTxCommonTime = response->masterTxCommonTime;
964         int64_t masterRxCommonTime = response->masterRxCommonTime;
965 
966         int64_t rtt       = (clientRxLocalTime - clientTxLocalTime);
967         int64_t avgLocal  = (clientTxLocalTime + clientRxLocalTime) >> 1;
968         int64_t avgCommon = (masterTxCommonTime + masterRxCommonTime) >> 1;
969 
970         // if the RTT of the packet is significantly larger than the panic
971         // threshold, we should simply discard it.  Its better to do nothing
972         // than to take cues from a packet like that.
973         int64_t rttCommon = mCommonClock.localDurationToCommonDuration(rtt);
974         if (rttCommon > (static_cast<int64_t>(mPanicThresholdUsec) *
975                          kRTTDiscardPanicThreshMultiplier)) {
976             ALOGV("Dropping sync response with RTT of %" PRId64 " uSec", rttCommon);
977             mClient_ExpiredSyncRespsRXedFromCurMaster++;
978             if (shouldPanicNotGettingGoodData())
979                 return becomeInitial("RX panic, no good data");
980             return true;
981         } else {
982             result = mClockRecovery.pushDisciplineEvent(avgLocal, avgCommon, rttCommon);
983             mClient_LastGoodSyncRX = clientRxLocalTime;
984 
985             if (result) {
986                 // indicate to listeners that we've synced to the common timeline
987                 notifyClockSync();
988             } else {
989                 ALOGE("Panic!  Observed clock sync error is too high to tolerate,"
990                         " resetting state machine and starting over.");
991                 notifyClockSyncLoss();
992                 return becomeInitial("panic");
993             }
994         }
995     }
996 
997     mCurTimeout.setTimeout(mSyncRequestIntervalMs);
998     return result;
999 }
1000 
handleMasterAnnouncement(const MasterAnnouncementPacket * packet,const sockaddr_storage & srcAddr)1001 bool CommonTimeServer::handleMasterAnnouncement(
1002         const MasterAnnouncementPacket* packet,
1003         const sockaddr_storage& srcAddr) {
1004     uint64_t newDeviceID   = packet->deviceID;
1005     uint8_t  newDevicePrio = packet->devicePriority;
1006     uint64_t newTimelineID = packet->timelineID;
1007 
1008     // Skip our own messages which come back via broadcast loopback.
1009     if (newDeviceID == mDeviceID)
1010         return true;
1011 
1012     char srcEPStr[64];
1013     sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
1014     mElectionLog.log("RXed master announcement while in state %s.  "
1015                      "src %s srcDevID %lld srcPrio %u srcTID %016llx",
1016                      stateToString(mState), srcEPStr,
1017                      newDeviceID, static_cast<uint32_t>(newDevicePrio),
1018                      newTimelineID);
1019 
1020     if (mState == ICommonClock::STATE_INITIAL ||
1021         mState == ICommonClock::STATE_RONIN ||
1022         mState == ICommonClock::STATE_WAIT_FOR_ELECTION) {
1023         // if we aren't currently following a master, then start following
1024         // this new master
1025         return becomeClient(srcAddr,
1026                             newDeviceID,
1027                             newDevicePrio,
1028                             newTimelineID,
1029                             "heard master announcement");
1030     } else if (mState == ICommonClock::STATE_CLIENT) {
1031         // if the new master wins arbitration against our current master,
1032         // then become a client of the new master
1033         if (arbitrateMaster(newDeviceID,
1034                             newDevicePrio,
1035                             mClient_MasterDeviceID,
1036                             mClient_MasterDevicePriority))
1037             return becomeClient(srcAddr,
1038                                 newDeviceID,
1039                                 newDevicePrio,
1040                                 newTimelineID,
1041                                 "heard master announcement");
1042     } else if (mState == ICommonClock::STATE_MASTER) {
1043         // two masters are competing - if the new one wins arbitration, then
1044         // cease acting as master
1045         if (arbitrateMaster(newDeviceID, newDevicePrio,
1046                             mDeviceID, effectivePriority()))
1047             return becomeClient(srcAddr, newDeviceID,
1048                                 newDevicePrio, newTimelineID,
1049                                 "heard master announcement");
1050     }
1051 
1052     return true;
1053 }
1054 
sendWhoIsMasterRequest()1055 bool CommonTimeServer::sendWhoIsMasterRequest() {
1056     assert(mState == ICommonClock::STATE_INITIAL || mState == ICommonClock::STATE_RONIN);
1057 
1058     // If we have no socket, then we must be in the unconfigured initial state.
1059     // Don't report any errors, just don't try to send the initial who-is-master
1060     // query.  Eventually, our network will either become configured, or we will
1061     // be forced into network-less master mode by higher level code.
1062     if (mSocket < 0) {
1063         assert(mState == ICommonClock::STATE_INITIAL);
1064         return true;
1065     }
1066 
1067     bool ret = false;
1068     WhoIsMasterRequestPacket pkt;
1069     pkt.initHeader(mSyncGroupID);
1070     pkt.senderDeviceID = mDeviceID;
1071     pkt.senderDevicePriority = effectivePriority();
1072 
1073     uint8_t buf[256];
1074     ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
1075     if (bufSz >= 0) {
1076         char dstEPStr[64];
1077         sockaddrToString(mMasterElectionEP, true, dstEPStr, sizeof(dstEPStr));
1078         mElectionLog.log("TXing WhoIs master request to %s while in state %s.  "
1079                          "ourTID %016llx ourGID %016llx ourDID %016llx "
1080                          "ourPrio %u",
1081                          dstEPStr, stateToString(mState),
1082                          mTimelineID, mSyncGroupID,
1083                          pkt.senderDeviceID, pkt.senderDevicePriority);
1084 
1085         ssize_t sendBytes = sendto(
1086                 mSocket, buf, bufSz, 0,
1087                 reinterpret_cast<const sockaddr *>(&mMasterElectionEP),
1088                 sizeof(mMasterElectionEP));
1089         if (sendBytes < 0)
1090             ALOGE("WhoIsMaster sendto failed (errno %d)", errno);
1091         ret = true;
1092     }
1093 
1094     if (mState == ICommonClock::STATE_INITIAL) {
1095         mCurTimeout.setTimeout(kInitial_WhoIsMasterTimeoutMs);
1096     } else {
1097         mCurTimeout.setTimeout(kRonin_WhoIsMasterTimeoutMs);
1098     }
1099 
1100     return ret;
1101 }
1102 
sendSyncRequest()1103 bool CommonTimeServer::sendSyncRequest() {
1104     // If we are sending sync requests, then we must be in the client state and
1105     // we must have a socket (when we have no network, we are only supposed to
1106     // be in INITIAL or MASTER)
1107     assert(mState == ICommonClock::STATE_CLIENT);
1108     assert(mSocket >= 0);
1109 
1110     bool ret = false;
1111     SyncRequestPacket pkt;
1112     pkt.initHeader(mTimelineID, mSyncGroupID);
1113     pkt.clientTxLocalTime = mLocalClock.getLocalTime();
1114 
1115     if (!mClient_FirstSyncTX)
1116         mClient_FirstSyncTX = pkt.clientTxLocalTime;
1117 
1118     mClient_PacketRTTLog.logTX(pkt.clientTxLocalTime);
1119 
1120     uint8_t buf[256];
1121     ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
1122     if (bufSz >= 0) {
1123         ssize_t sendBytes = sendto(
1124                 mSocket, buf, bufSz, 0,
1125                 reinterpret_cast<const sockaddr *>(&mMasterEP),
1126                 sizeof(mMasterEP));
1127         if (sendBytes < 0)
1128             ALOGE("SyncRequest sendto failed (errno %d)", errno);
1129         ret = true;
1130     }
1131 
1132     mClient_SyncsSentToCurMaster++;
1133     mCurTimeout.setTimeout(mSyncRequestIntervalMs);
1134     mClient_SyncRequestPending = true;
1135 
1136     return ret;
1137 }
1138 
sendMasterAnnouncement()1139 bool CommonTimeServer::sendMasterAnnouncement() {
1140     bool ret = false;
1141     assert(mState == ICommonClock::STATE_MASTER);
1142 
1143     // If we are being asked to send a master announcement, but we have no
1144     // socket, we must be in network-less master mode.  Don't bother to send the
1145     // announcement, and don't bother to schedule a timeout.  When the network
1146     // comes up, the work thread will get poked and start the process of
1147     // figuring out who the current master should be.
1148     if (mSocket < 0) {
1149         mCurTimeout.setTimeout(kInfiniteTimeout);
1150         return true;
1151     }
1152 
1153     MasterAnnouncementPacket pkt;
1154     pkt.initHeader(mTimelineID, mSyncGroupID);
1155     pkt.deviceID = mDeviceID;
1156     pkt.devicePriority = effectivePriority();
1157 
1158     uint8_t buf[256];
1159     ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
1160     if (bufSz >= 0) {
1161         char dstEPStr[64];
1162         sockaddrToString(mMasterElectionEP, true, dstEPStr, sizeof(dstEPStr));
1163         mElectionLog.log("TXing Master announcement to %s while in state %s.  "
1164                          "ourTID %016llx ourGID %016llx ourDID %016llx "
1165                          "ourPrio %u",
1166                          dstEPStr, stateToString(mState),
1167                          mTimelineID, mSyncGroupID,
1168                          pkt.deviceID, pkt.devicePriority);
1169 
1170         ssize_t sendBytes = sendto(
1171                 mSocket, buf, bufSz, 0,
1172                 reinterpret_cast<const sockaddr *>(&mMasterElectionEP),
1173                 sizeof(mMasterElectionEP));
1174         if (sendBytes < 0)
1175             ALOGE("MasterAnnouncement sendto failed (errno %d)", errno);
1176         ret = true;
1177     }
1178 
1179     mCurTimeout.setTimeout(mMasterAnnounceIntervalMs);
1180     return ret;
1181 }
1182 
becomeClient(const sockaddr_storage & masterEP,uint64_t masterDeviceID,uint8_t masterDevicePriority,uint64_t timelineID,const char * cause)1183 bool CommonTimeServer::becomeClient(const sockaddr_storage& masterEP,
1184                                     uint64_t masterDeviceID,
1185                                     uint8_t  masterDevicePriority,
1186                                     uint64_t timelineID,
1187                                     const char* cause) {
1188     char newEPStr[64], oldEPStr[64];
1189     sockaddrToString(masterEP, true, newEPStr, sizeof(newEPStr));
1190     sockaddrToString(mMasterEP, mMasterEPValid, oldEPStr, sizeof(oldEPStr));
1191 
1192     mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1193             "%s --> CLIENT (%s) :%s"
1194             " OldMaster: %02x-%014llx::%016llx::%s"
1195             " NewMaster: %02x-%014llx::%016llx::%s",
1196             stateToString(mState), cause,
1197             (mTimelineID != timelineID) ? " (new timeline)" : "",
1198             mClient_MasterDevicePriority, mClient_MasterDeviceID,
1199             mTimelineID, oldEPStr,
1200             masterDevicePriority, masterDeviceID,
1201             timelineID, newEPStr);
1202 
1203     if (mTimelineID != timelineID) {
1204         // start following a new timeline
1205         mTimelineID = timelineID;
1206         mClockRecovery.reset(true, true);
1207         notifyClockSyncLoss();
1208     } else {
1209         // start following a new master on the existing timeline
1210         mClockRecovery.reset(false, true);
1211     }
1212 
1213     mMasterEP = masterEP;
1214     mMasterEPValid = true;
1215 
1216     // If we are on a real network as a client of a real master, then we should
1217     // no longer force low priority.  If our master disappears, we should have
1218     // the high priority bit set during the election to replace the master
1219     // because this group was a real group and not a singleton created in
1220     // networkless mode.
1221     setForceLowPriority(false);
1222 
1223     mClient_MasterDeviceID = masterDeviceID;
1224     mClient_MasterDevicePriority = masterDevicePriority;
1225     resetSyncStats();
1226 
1227     setState(ICommonClock::STATE_CLIENT);
1228 
1229     // add some jitter to when the various clients send their requests
1230     // in order to reduce the likelihood that a group of clients overload
1231     // the master after receiving a master announcement
1232     usleep((lrand48() % 100) * 1000);
1233 
1234     return sendSyncRequest();
1235 }
1236 
becomeMaster(const char * cause)1237 bool CommonTimeServer::becomeMaster(const char* cause) {
1238     uint64_t oldTimelineID = mTimelineID;
1239     if (mTimelineID == ICommonClock::kInvalidTimelineID) {
1240         // this device has not been following any existing timeline,
1241         // so it will create a new timeline and declare itself master
1242         assert(!mCommonClock.isValid());
1243 
1244         // set the common time basis
1245         mCommonClock.setBasis(mLocalClock.getLocalTime(), 0);
1246 
1247         // assign an arbitrary timeline iD
1248         assignTimelineID();
1249 
1250         // notify listeners that we've created a common timeline
1251         notifyClockSync();
1252     }
1253 
1254     mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1255             "%s --> MASTER (%s) : %s timeline %016llx",
1256             stateToString(mState), cause,
1257             (oldTimelineID == mTimelineID) ? "taking ownership of"
1258                                            : "creating new",
1259             mTimelineID);
1260 
1261     memset(&mMasterEP, 0, sizeof(mMasterEP));
1262     mMasterEPValid = false;
1263     mClient_MasterDevicePriority = effectivePriority();
1264     mClient_MasterDeviceID = mDeviceID;
1265     mClockRecovery.reset(false, true);
1266     resetSyncStats();
1267 
1268     setState(ICommonClock::STATE_MASTER);
1269     return sendMasterAnnouncement();
1270 }
1271 
becomeRonin(const char * cause)1272 bool CommonTimeServer::becomeRonin(const char* cause) {
1273     // If we were the client of a given timeline, but had never received even a
1274     // single time sync packet, then we transition back to Initial instead of
1275     // Ronin.  If we transition to Ronin and end up becoming the new Master, we
1276     // will be unable to service requests for other clients because we never
1277     // actually knew what time it was.  By going to initial, we ensure that
1278     // other clients who know what time it is, but would lose master arbitration
1279     // in the Ronin case, will step up and become the proper new master of the
1280     // old timeline.
1281 
1282     char oldEPStr[64];
1283     sockaddrToString(mMasterEP, mMasterEPValid, oldEPStr, sizeof(oldEPStr));
1284     memset(&mMasterEP, 0, sizeof(mMasterEP));
1285     mMasterEPValid = false;
1286 
1287     if (mCommonClock.isValid()) {
1288         mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1289              "%s --> RONIN (%s) : lost track of previously valid timeline "
1290              "%02x-%014llx::%016llx::%s (%d TXed %d RXed %d RXExpired)",
1291              stateToString(mState), cause,
1292              mClient_MasterDevicePriority, mClient_MasterDeviceID,
1293              mTimelineID, oldEPStr,
1294              mClient_SyncsSentToCurMaster,
1295              mClient_SyncRespsRXedFromCurMaster,
1296              mClient_ExpiredSyncRespsRXedFromCurMaster);
1297 
1298         mRonin_WhoIsMasterRequestTimeouts = 0;
1299         setState(ICommonClock::STATE_RONIN);
1300         return sendWhoIsMasterRequest();
1301     } else {
1302         mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1303              "%s --> INITIAL (%s) : never synced timeline "
1304              "%02x-%014llx::%016llx::%s (%d TXed %d RXed %d RXExpired)",
1305              stateToString(mState), cause,
1306              mClient_MasterDevicePriority, mClient_MasterDeviceID,
1307              mTimelineID, oldEPStr,
1308              mClient_SyncsSentToCurMaster,
1309              mClient_SyncRespsRXedFromCurMaster,
1310              mClient_ExpiredSyncRespsRXedFromCurMaster);
1311 
1312         return becomeInitial("ronin, no timeline");
1313     }
1314 }
1315 
becomeWaitForElection(const char * cause)1316 bool CommonTimeServer::becomeWaitForElection(const char* cause) {
1317     mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1318          "%s --> WAIT_FOR_ELECTION (%s) : dropping out of election,"
1319          " waiting %d mSec for completion.",
1320          stateToString(mState), cause, kWaitForElection_TimeoutMs);
1321 
1322     setState(ICommonClock::STATE_WAIT_FOR_ELECTION);
1323     mCurTimeout.setTimeout(kWaitForElection_TimeoutMs);
1324     return true;
1325 }
1326 
becomeInitial(const char * cause)1327 bool CommonTimeServer::becomeInitial(const char* cause) {
1328     mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1329                         "Entering INITIAL (%s), total reset.",
1330                         cause);
1331 
1332     setState(ICommonClock::STATE_INITIAL);
1333 
1334     // reset clock recovery
1335     mClockRecovery.reset(true, true);
1336 
1337     // reset internal state bookkeeping.
1338     mCurTimeout.setTimeout(kInfiniteTimeout);
1339     memset(&mMasterEP, 0, sizeof(mMasterEP));
1340     mMasterEPValid = false;
1341     mLastPacketRxLocalTime = 0;
1342     mTimelineID = ICommonClock::kInvalidTimelineID;
1343     mClockSynced = false;
1344     mInitial_WhoIsMasterRequestTimeouts = 0;
1345     mClient_MasterDeviceID = 0;
1346     mClient_MasterDevicePriority = 0;
1347     mRonin_WhoIsMasterRequestTimeouts = 0;
1348     resetSyncStats();
1349 
1350     // send the first request to discover the master
1351     return sendWhoIsMasterRequest();
1352 }
1353 
notifyClockSync()1354 void CommonTimeServer::notifyClockSync() {
1355     if (!mClockSynced) {
1356         mClockSynced = true;
1357         mICommonClock->notifyOnTimelineChanged(mTimelineID);
1358     }
1359 }
1360 
notifyClockSyncLoss()1361 void CommonTimeServer::notifyClockSyncLoss() {
1362     if (mClockSynced) {
1363         mClockSynced = false;
1364         mICommonClock->notifyOnTimelineChanged(
1365                 ICommonClock::kInvalidTimelineID);
1366     }
1367 }
1368 
setState(ICommonClock::State s)1369 void CommonTimeServer::setState(ICommonClock::State s) {
1370     mState = s;
1371 }
1372 
stateToString(ICommonClock::State s)1373 const char* CommonTimeServer::stateToString(ICommonClock::State s) {
1374     switch(s) {
1375         case ICommonClock::STATE_INITIAL:
1376             return "INITIAL";
1377         case ICommonClock::STATE_CLIENT:
1378             return "CLIENT";
1379         case ICommonClock::STATE_MASTER:
1380             return "MASTER";
1381         case ICommonClock::STATE_RONIN:
1382             return "RONIN";
1383         case ICommonClock::STATE_WAIT_FOR_ELECTION:
1384             return "WAIT_FOR_ELECTION";
1385         default:
1386             return "unknown";
1387     }
1388 }
1389 
sockaddrToString(const sockaddr_storage & addr,bool addrValid,char * buf,size_t bufLen)1390 void CommonTimeServer::sockaddrToString(const sockaddr_storage& addr,
1391                                         bool addrValid,
1392                                         char* buf, size_t bufLen) {
1393     if (!bufLen || !buf)
1394         return;
1395 
1396     if (addrValid) {
1397         switch (addr.ss_family) {
1398             case AF_INET: {
1399                 const struct sockaddr_in* sa =
1400                     reinterpret_cast<const struct sockaddr_in*>(&addr);
1401                 unsigned long a = ntohl(sa->sin_addr.s_addr);
1402                 uint16_t      p = ntohs(sa->sin_port);
1403                 snprintf(buf, bufLen, "%lu.%lu.%lu.%lu:%hu",
1404                         ((a >> 24) & 0xFF), ((a >> 16) & 0xFF),
1405                         ((a >>  8) & 0xFF),  (a        & 0xFF), p);
1406             } break;
1407 
1408             case AF_INET6: {
1409                 const struct sockaddr_in6* sa =
1410                     reinterpret_cast<const struct sockaddr_in6*>(&addr);
1411                 const uint8_t* a = sa->sin6_addr.s6_addr;
1412                 uint16_t       p = ntohs(sa->sin6_port);
1413                 snprintf(buf, bufLen,
1414                         "%02X%02X:%02X%02X:%02X%02X:%02X%02X:"
1415                         "%02X%02X:%02X%02X:%02X%02X:%02X%02X port %hd",
1416                         a[0], a[1], a[ 2], a[ 3], a[ 4], a[ 5], a[ 6], a[ 7],
1417                         a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15],
1418                         p);
1419             } break;
1420 
1421             default:
1422                 snprintf(buf, bufLen,
1423                          "<unknown sockaddr family %d>", addr.ss_family);
1424                 break;
1425         }
1426     } else {
1427         snprintf(buf, bufLen, "<none>");
1428     }
1429 
1430     buf[bufLen - 1] = 0;
1431 }
1432 
sockaddrMatch(const sockaddr_storage & a1,const sockaddr_storage & a2,bool matchAddressOnly)1433 bool CommonTimeServer::sockaddrMatch(const sockaddr_storage& a1,
1434                                      const sockaddr_storage& a2,
1435                                      bool matchAddressOnly) {
1436     if (a1.ss_family != a2.ss_family)
1437         return false;
1438 
1439     switch (a1.ss_family) {
1440         case AF_INET: {
1441             const struct sockaddr_in* sa1 =
1442                 reinterpret_cast<const struct sockaddr_in*>(&a1);
1443             const struct sockaddr_in* sa2 =
1444                 reinterpret_cast<const struct sockaddr_in*>(&a2);
1445 
1446             if (sa1->sin_addr.s_addr != sa2->sin_addr.s_addr)
1447                 return false;
1448 
1449             return (matchAddressOnly || (sa1->sin_port == sa2->sin_port));
1450         } break;
1451 
1452         case AF_INET6: {
1453             const struct sockaddr_in6* sa1 =
1454                 reinterpret_cast<const struct sockaddr_in6*>(&a1);
1455             const struct sockaddr_in6* sa2 =
1456                 reinterpret_cast<const struct sockaddr_in6*>(&a2);
1457 
1458             if (memcmp(&sa1->sin6_addr, &sa2->sin6_addr, sizeof(sa2->sin6_addr)))
1459                 return false;
1460 
1461             return (matchAddressOnly || (sa1->sin6_port == sa2->sin6_port));
1462         } break;
1463 
1464         // Huh?  We don't deal in non-IPv[46] addresses.  Not sure how we got
1465         // here, but we don't know how to comapre these addresses and simply
1466         // default to a no-match decision.
1467         default: return false;
1468     }
1469 }
1470 
shouldPanicNotGettingGoodData()1471 bool CommonTimeServer::shouldPanicNotGettingGoodData() {
1472     if (mClient_FirstSyncTX) {
1473         int64_t now = mLocalClock.getLocalTime();
1474         int64_t delta = now - (mClient_LastGoodSyncRX
1475                              ? mClient_LastGoodSyncRX
1476                              : mClient_FirstSyncTX);
1477         int64_t deltaUsec = mCommonClock.localDurationToCommonDuration(delta);
1478 
1479         if (deltaUsec >= kNoGoodDataPanicThresholdUsec)
1480             return true;
1481     }
1482 
1483     return false;
1484 }
1485 
logTX(int64_t txTime)1486 void CommonTimeServer::PacketRTTLog::logTX(int64_t txTime) {
1487     txTimes[wrPtr] = txTime;
1488     rxTimes[wrPtr] = 0;
1489     wrPtr = (wrPtr + 1) % RTT_LOG_SIZE;
1490     if (!wrPtr)
1491         logFull = true;
1492 }
1493 
logRX(int64_t txTime,int64_t rxTime)1494 void CommonTimeServer::PacketRTTLog::logRX(int64_t txTime, int64_t rxTime) {
1495     if (!logFull && !wrPtr)
1496         return;
1497 
1498     uint32_t i = logFull ? wrPtr : 0;
1499     do {
1500         if (txTimes[i] == txTime) {
1501             rxTimes[i] = rxTime;
1502             break;
1503         }
1504         i = (i + 1) % RTT_LOG_SIZE;
1505     } while (i != wrPtr);
1506 }
1507 
1508 }  // namespace android
1509