1 //
2 // Copyright (C) 2015 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "shill/connection_diagnostics.h"
18 
19 #include <base/bind.h>
20 #include <base/strings/stringprintf.h>
21 
22 #include "shill/arp_client.h"
23 #include "shill/arp_packet.h"
24 #include "shill/connection.h"
25 #include "shill/connectivity_trial.h"
26 #include "shill/device_info.h"
27 #include "shill/dns_client.h"
28 #include "shill/dns_client_factory.h"
29 #include "shill/error.h"
30 #include "shill/event_dispatcher.h"
31 #include "shill/http_url.h"
32 #include "shill/icmp_session.h"
33 #include "shill/icmp_session_factory.h"
34 #include "shill/logging.h"
35 #include "shill/metrics.h"
36 #include "shill/net/byte_string.h"
37 #include "shill/net/rtnl_handler.h"
38 #include "shill/net/rtnl_listener.h"
39 #include "shill/net/rtnl_message.h"
40 #include "shill/routing_table.h"
41 #include "shill/routing_table_entry.h"
42 
43 using base::Bind;
44 using base::StringPrintf;
45 using std::string;
46 using std::vector;
47 
48 namespace {
49 // These strings are dependent on ConnectionDiagnostics::Type. Any changes to
50 // this array should be synced with ConnectionDiagnostics::Type.
51 const char* kEventNames[] = {
52     "Portal detection",
53     "Ping DNS servers",
54     "DNS resolution",
55     "Ping (target web server)",
56     "Ping (gateway)",
57     "Find route",
58     "ARP table lookup",
59     "Neighbor table lookup",
60     "IP collision check"
61 };
62 // These strings are dependent on ConnectionDiagnostics::Phase. Any changes to
63 // this array should be synced with ConnectionDiagnostics::Phase.
64 const char* kPhaseNames[] = {
65     "Start",
66     "End",
67     "End (Content)",
68     "End (DNS)",
69     "End (HTTP/CXN)"
70 };
71 // These strings are dependent on ConnectionDiagnostics::Result. Any changes to
72 // this array should be synced with ConnectionDiagnostics::Result.
73 const char* kResultNames[] = {
74     "Success",
75     "Failure",
76     "Timeout"
77 };
78 // After we fail to ping the gateway, we 1) start ARP lookup, 2) fail ARP
79 // lookup, 3) start IP collision check, 4) end IP collision check.
80 const int kNumEventsFromPingGatewayEndToIpCollisionCheckEnd = 4;
81 }  // namespace
82 
83 namespace shill {
84 
85 namespace Logging {
86 static auto kModuleLogScope = ScopeLogger::kWiFi;
ObjectID(ConnectionDiagnostics * n)87 static string ObjectID(ConnectionDiagnostics* n) {
88   return "(connection_diagnostics)";
89 }
90 }
91 
92 const char ConnectionDiagnostics::kIssueIPCollision[] =
93     "IP collision detected. Another host on the local network has been "
94     "assigned the same IP address.";
95 const char ConnectionDiagnostics::kIssueRouting[] = "Routing problem detected.";
96 const char ConnectionDiagnostics::kIssueHTTPBrokenPortal[] =
97     "Target URL is pingable. Connectivity problems might be caused by HTTP "
98     "issues on the server or a broken portal.";
99 const char ConnectionDiagnostics::kIssueDNSServerMisconfig[] =
100     "DNS servers responding to DNS queries, but sending invalid responses. "
101     "DNS servers might be misconfigured.";
102 const char ConnectionDiagnostics::kIssueDNSServerNoResponse[] =
103     "At least one DNS server is pingable, but is not responding to DNS "
104     "requests. DNS server issue detected.";
105 const char ConnectionDiagnostics::kIssueNoDNSServersConfigured[] =
106     "No DNS servers have been configured for this connection -- either the "
107     "DHCP server or user configuration is invalid.";
108 const char ConnectionDiagnostics::kIssueDNSServersInvalid[] =
109     "All configured DNS server addresses are invalid.";
110 const char ConnectionDiagnostics::kIssueNone[] =
111     "No connection issue detected.";
112 const char ConnectionDiagnostics::kIssueCaptivePortal[] =
113     "Trapped in captive portal.";
114 const char ConnectionDiagnostics::kIssueGatewayUpstream[] =
115     "We can find a route to the target web server at a remote IP address, "
116     "and the local gateway is pingable. Gatway issue or upstream "
117     "connectivity problem detected.";
118 const char ConnectionDiagnostics::kIssueGatewayNotResponding[] =
119     "This gateway appears to be on the local network, but is not responding to "
120     "pings.";
121 const char ConnectionDiagnostics::kIssueServerNotResponding[] =
122     "This web server appears to be on the local network, but is not responding "
123     "to pings.";
124 const char ConnectionDiagnostics::kIssueGatewayArpFailed[] =
125     "No ARP entry for the gateway. Either the gateway does not exist on the "
126     "local network, or there are link layer issues.";
127 const char ConnectionDiagnostics::kIssueServerArpFailed[] =
128     "No ARP entry for the web server. Either the web server does not exist on "
129     "the local network, or there are link layer issues.";
130 const char ConnectionDiagnostics::kIssueInternalError[] =
131     "The connection diagnostics encountered an internal failure.";
132 const char ConnectionDiagnostics::kIssueGatewayNoNeighborEntry[] =
133     "No neighbor table entry for the gateway. Either the gateway does not "
134     "exist on the local network, or there are link layer issues.";
135 const char ConnectionDiagnostics::kIssueServerNoNeighborEntry[] =
136     "No neighbor table entry for the web server. Either the web server does "
137     "not exist on the local network, or there are link layer issues.";
138 const char ConnectionDiagnostics::kIssueGatewayNeighborEntryNotConnected[] =
139     "Neighbor table entry for the gateway is not in a connected state. Either "
140     "the web server does not exist on the local network, or there are link "
141     "layer issues.";
142 const char ConnectionDiagnostics::kIssueServerNeighborEntryNotConnected[] =
143     "Neighbor table entry for the web server is not in a connected state. "
144     "Either the web server does not exist on the local network, or there are "
145     "link layer issues.";
146 const int ConnectionDiagnostics::kMaxDNSRetries = 2;
147 const int ConnectionDiagnostics::kRouteQueryTimeoutSeconds = 1;
148 const int ConnectionDiagnostics::kArpReplyTimeoutSeconds = 1;
149 const int ConnectionDiagnostics::kNeighborTableRequestTimeoutSeconds = 1;
150 const int ConnectionDiagnostics::kDNSTimeoutSeconds = 3;
151 
ConnectionDiagnostics(ConnectionRefPtr connection,EventDispatcher * dispatcher,Metrics * metrics,const DeviceInfo * device_info,const ResultCallback & result_callback)152 ConnectionDiagnostics::ConnectionDiagnostics(
153     ConnectionRefPtr connection, EventDispatcher* dispatcher, Metrics* metrics,
154     const DeviceInfo* device_info, const ResultCallback& result_callback)
155     : weak_ptr_factory_(this),
156       dispatcher_(dispatcher),
157       metrics_(metrics),
158       routing_table_(RoutingTable::GetInstance()),
159       rtnl_handler_(RTNLHandler::GetInstance()),
160       connection_(connection),
161       device_info_(device_info),
162       dns_client_factory_(DNSClientFactory::GetInstance()),
163       portal_detector_(new PortalDetector(
164           connection_, dispatcher_,
165           Bind(&ConnectionDiagnostics::StartAfterPortalDetectionInternal,
166                weak_ptr_factory_.GetWeakPtr()))),
167       arp_client_(new ArpClient(connection_->interface_index())),
168       icmp_session_(new IcmpSession(dispatcher_)),
169       icmp_session_factory_(IcmpSessionFactory::GetInstance()),
170       num_dns_attempts_(0),
171       running_(false),
172       result_callback_(result_callback) {}
173 
~ConnectionDiagnostics()174 ConnectionDiagnostics::~ConnectionDiagnostics() {
175   Stop();
176 }
177 
Start(const string & url_string)178 bool ConnectionDiagnostics::Start(const string& url_string) {
179   SLOG(this, 3) << __func__ << "(" << url_string << ")";
180 
181   if (running()) {
182     LOG(ERROR) << "Connection diagnostics already started";
183     return false;
184   }
185 
186   target_url_.reset(new HTTPURL());
187   if (!target_url_->ParseFromString(url_string)) {
188     LOG(ERROR) << "Failed to parse URL string: " << url_string;
189     Stop();
190     return false;
191   }
192 
193   if (!portal_detector_->Start(url_string)) {
194     Stop();
195     return false;
196   }
197 
198   running_ = true;
199   AddEvent(kTypePortalDetection, kPhaseStart, kResultSuccess);
200   return true;
201 }
202 
StartAfterPortalDetection(const string & url_string,const PortalDetector::Result & result)203 bool ConnectionDiagnostics::StartAfterPortalDetection(
204     const string& url_string, const PortalDetector::Result& result) {
205   SLOG(this, 3) << __func__ << "(" << url_string << ")";
206 
207   if (running()) {
208     LOG(ERROR) << "Connection diagnostics already started";
209     return false;
210   }
211 
212   target_url_.reset(new HTTPURL());
213   if (!target_url_->ParseFromString(url_string)) {
214     LOG(ERROR) << "Failed to parse URL string: " << url_string;
215     Stop();
216     return false;
217   }
218 
219   running_ = true;
220   dispatcher_->PostTask(
221       Bind(&ConnectionDiagnostics::StartAfterPortalDetectionInternal,
222            weak_ptr_factory_.GetWeakPtr(), result));
223   return true;
224 }
225 
Stop()226 void ConnectionDiagnostics::Stop() {
227   SLOG(this, 3) << __func__;
228 
229   running_ = false;
230   num_dns_attempts_ = 0;
231   diagnostic_events_.clear();
232   dns_client_.reset();
233   arp_client_->Stop();
234   icmp_session_->Stop();
235   portal_detector_.reset();
236   receive_response_handler_.reset();
237   neighbor_msg_listener_.reset();
238   id_to_pending_dns_server_icmp_session_.clear();
239   target_url_.reset();
240   route_query_callback_.Cancel();
241   route_query_timeout_callback_.Cancel();
242   arp_reply_timeout_callback_.Cancel();
243   neighbor_request_timeout_callback_.Cancel();
244 }
245 
246 // static
EventToString(const Event & event)247 string ConnectionDiagnostics::EventToString(const Event& event) {
248   string message("");
249   message.append(StringPrintf("Event: %-26sPhase: %-17sResult: %-10s",
250                               kEventNames[event.type], kPhaseNames[event.phase],
251                               kResultNames[event.result]));
252   if (!event.message.empty()) {
253     message.append(StringPrintf("Msg: %s", event.message.c_str()));
254   }
255   return message;
256 }
257 
AddEvent(Type type,Phase phase,Result result)258 void ConnectionDiagnostics::AddEvent(Type type, Phase phase, Result result) {
259   AddEventWithMessage(type, phase, result, "");
260 }
261 
AddEventWithMessage(Type type,Phase phase,Result result,const string & message)262 void ConnectionDiagnostics::AddEventWithMessage(Type type, Phase phase,
263                                                 Result result,
264                                                 const string& message) {
265   diagnostic_events_.push_back(Event(type, phase, result, message));
266 }
267 
ReportResultAndStop(const string & issue)268 void ConnectionDiagnostics::ReportResultAndStop(const string& issue) {
269   SLOG(this, 3) << __func__;
270 
271   metrics_->NotifyConnectionDiagnosticsIssue(issue);
272   if (!result_callback_.is_null()) {
273     LOG(INFO) << "Connection diagnostics events:";
274     for (size_t i = 0; i < diagnostic_events_.size(); ++i) {
275       LOG(INFO) << "  #" << i << ": "
276                 << EventToString(diagnostic_events_[i]);
277     }
278     LOG(INFO) << "Connection diagnostics completed. Connection issue: "
279               << issue;
280     result_callback_.Run(issue, diagnostic_events_);
281   }
282   Stop();
283 }
284 
StartAfterPortalDetectionInternal(const PortalDetector::Result & result)285 void ConnectionDiagnostics::StartAfterPortalDetectionInternal(
286     const PortalDetector::Result& result) {
287   SLOG(this, 3) << __func__;
288 
289   Result result_type;
290   if (result.trial_result.status == ConnectivityTrial::kStatusSuccess) {
291     result_type = kResultSuccess;
292   } else if (result.trial_result.status == ConnectivityTrial::kStatusTimeout) {
293     result_type = kResultTimeout;
294   } else {
295     result_type = kResultFailure;
296   }
297 
298   switch (result.trial_result.phase) {
299     case ConnectivityTrial::kPhaseContent: {
300       AddEvent(kTypePortalDetection, kPhasePortalDetectionEndContent,
301                result_type);
302       // We have found the issue if we end in the content phase.
303       ReportResultAndStop(result_type == kResultSuccess ? kIssueNone
304                                                         : kIssueCaptivePortal);
305       break;
306     }
307     case ConnectivityTrial::kPhaseDNS: {
308       AddEvent(kTypePortalDetection, kPhasePortalDetectionEndDNS, result_type);
309       if (result.trial_result.status == ConnectivityTrial::kStatusSuccess) {
310         LOG(ERROR) << __func__ << ": portal detection should not end with "
311                                   "success status in DNS phase";
312         ReportResultAndStop(kIssueInternalError);
313       } else if (result.trial_result.status ==
314                  ConnectivityTrial::kStatusTimeout) {
315         // DNS timeout occurred in portal detection. Ping DNS servers to make
316         // sure they are reachable.
317         dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingDNSServers,
318                                    weak_ptr_factory_.GetWeakPtr()));
319       } else {
320         ReportResultAndStop(kIssueDNSServerMisconfig);
321       }
322       break;
323     }
324     case ConnectivityTrial::kPhaseConnection:
325     case ConnectivityTrial::kPhaseHTTP:
326     case ConnectivityTrial::kPhaseUnknown:
327     default: {
328       AddEvent(kTypePortalDetection, kPhasePortalDetectionEndOther,
329                result_type);
330       if (result.trial_result.status == ConnectivityTrial::kStatusSuccess) {
331         LOG(ERROR) << __func__
332                    << ": portal detection should not end with success status in"
333                       " Connection/HTTP/Unknown phase";
334         ReportResultAndStop(kIssueInternalError);
335       } else {
336         dispatcher_->PostTask(
337             Bind(&ConnectionDiagnostics::ResolveTargetServerIPAddress,
338                  weak_ptr_factory_.GetWeakPtr(), connection_->dns_servers()));
339       }
340       break;
341     }
342   }
343 }
344 
ResolveTargetServerIPAddress(const vector<string> & dns_servers)345 void ConnectionDiagnostics::ResolveTargetServerIPAddress(
346     const vector<string>& dns_servers) {
347   SLOG(this, 3) << __func__;
348 
349   Error e;
350   dns_client_.reset(dns_client_factory_->CreateDNSClient(
351       connection_->IsIPv6() ? IPAddress::kFamilyIPv6 : IPAddress::kFamilyIPv4,
352       connection_->interface_name(), dns_servers, kDNSTimeoutSeconds * 1000,
353       dispatcher_, Bind(&ConnectionDiagnostics::OnDNSResolutionComplete,
354                         weak_ptr_factory_.GetWeakPtr())));
355   if (!dns_client_->Start(target_url_->host(), &e)) {
356     LOG(ERROR) << __func__ << ": could not start DNS -- " << e.message();
357     AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseStart, kResultFailure,
358                         e.message().c_str());
359     ReportResultAndStop(kIssueInternalError);
360     return;
361   }
362 
363   AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseStart, kResultSuccess,
364                       StringPrintf("Attempt #%d", num_dns_attempts_));
365   SLOG(this, 3) << __func__ << ": looking up " << target_url_->host()
366                 << " (attempt " << num_dns_attempts_ << ")";
367   ++num_dns_attempts_;
368 }
369 
PingDNSServers()370 void ConnectionDiagnostics::PingDNSServers() {
371   SLOG(this, 3) << __func__;
372 
373   if (connection_->dns_servers().empty()) {
374     LOG(ERROR) << __func__ << ": no DNS servers for this connection";
375     AddEventWithMessage(kTypePingDNSServers, kPhaseStart, kResultFailure,
376                         "No DNS servers for this connection");
377     ReportResultAndStop(kIssueNoDNSServersConfigured);
378     return;
379   }
380 
381   id_to_pending_dns_server_icmp_session_.clear();
382   pingable_dns_servers_.clear();
383   size_t num_invalid_dns_server_addr = 0;
384   size_t num_failed_icmp_session_start = 0;
385   for (size_t i = 0; i < connection_->dns_servers().size(); ++i) {
386     // If we encounter any errors starting ping for any DNS server, carry on
387     // attempting to ping the other DNS servers rather than failing. We only
388     // need to successfully ping a single DNS server to decide whether or not
389     // DNS servers can be reached.
390     IPAddress dns_server_ip_addr(connection_->dns_servers()[i]);
391     if (dns_server_ip_addr.family() == IPAddress::kFamilyUnknown) {
392       LOG(ERROR) << __func__
393                  << ": could not parse DNS server IP address from string";
394       ++num_invalid_dns_server_addr;
395       continue;
396     }
397 
398     bool emplace_success =
399         (id_to_pending_dns_server_icmp_session_.emplace(
400              i, std::unique_ptr<IcmpSession>(
401                     icmp_session_factory_->CreateIcmpSession(dispatcher_))))
402             .second;
403     if (emplace_success &&
404         id_to_pending_dns_server_icmp_session_.at(i)
405             ->Start(dns_server_ip_addr,
406                     Bind(&ConnectionDiagnostics::OnPingDNSServerComplete,
407                          weak_ptr_factory_.GetWeakPtr(), i))) {
408       SLOG(this, 3) << __func__ << ": pinging DNS server at "
409                     << dns_server_ip_addr.ToString();
410     } else {
411       LOG(ERROR) << "Failed to initiate ping for DNS server at "
412                  << dns_server_ip_addr.ToString();
413       ++num_failed_icmp_session_start;
414       if (emplace_success) {
415         id_to_pending_dns_server_icmp_session_.erase(i);
416       }
417     }
418   }
419 
420   if (id_to_pending_dns_server_icmp_session_.empty()) {
421     AddEventWithMessage(
422         kTypePingDNSServers, kPhaseStart, kResultFailure,
423         "Could not start ping for any of the given DNS servers");
424     if (num_invalid_dns_server_addr == connection_->dns_servers().size()) {
425       ReportResultAndStop(kIssueDNSServersInvalid);
426     } else if (num_failed_icmp_session_start ==
427                connection_->dns_servers().size()) {
428       ReportResultAndStop(kIssueInternalError);
429     }
430   } else {
431     AddEvent(kTypePingDNSServers, kPhaseStart, kResultSuccess);
432   }
433 }
434 
FindRouteToHost(const IPAddress & address)435 void ConnectionDiagnostics::FindRouteToHost(const IPAddress& address) {
436   SLOG(this, 3) << __func__;
437 
438   RoutingTableEntry entry;
439   route_query_callback_.Reset(Bind(&ConnectionDiagnostics::OnRouteQueryResponse,
440                                    weak_ptr_factory_.GetWeakPtr()));
441   if (!routing_table_->RequestRouteToHost(
442           address, connection_->interface_index(), -1,
443           route_query_callback_.callback(), connection_->table_id())) {
444     route_query_callback_.Cancel();
445     LOG(ERROR) << __func__ << ": could not request route to "
446                << address.ToString();
447     AddEventWithMessage(kTypeFindRoute, kPhaseStart, kResultFailure,
448                         StringPrintf("Could not request route to %s",
449                                      address.ToString().c_str()));
450     ReportResultAndStop(kIssueInternalError);
451     return;
452   }
453 
454   // RoutingTable implementation does not have a built-in timeout mechanism
455   // for un-replied route requests, so use our own.
456   route_query_timeout_callback_.Reset(
457       Bind(&ConnectionDiagnostics::OnRouteQueryTimeout,
458            weak_ptr_factory_.GetWeakPtr()));
459   dispatcher_->PostDelayedTask(route_query_timeout_callback_.callback(),
460                                kRouteQueryTimeoutSeconds * 1000);
461   AddEventWithMessage(
462       kTypeFindRoute, kPhaseStart, kResultSuccess,
463       StringPrintf("Requesting route to %s", address.ToString().c_str()));
464 }
465 
FindArpTableEntry(const IPAddress & address)466 void ConnectionDiagnostics::FindArpTableEntry(const IPAddress& address) {
467   SLOG(this, 3) << __func__;
468 
469   if (address.family() != IPAddress::kFamilyIPv4) {
470     // We only perform ARP table lookups for IPv4 addresses.
471     LOG(ERROR) << __func__ << ": " << address.ToString()
472                << " is not an IPv4 address";
473     AddEventWithMessage(
474         kTypeArpTableLookup, kPhaseStart, kResultFailure,
475         StringPrintf("%s is not an IPv4 address", address.ToString().c_str()));
476     ReportResultAndStop(kIssueInternalError);
477     return;
478   }
479 
480   AddEventWithMessage(kTypeArpTableLookup, kPhaseStart, kResultSuccess,
481                       StringPrintf("Finding ARP table entry for %s",
482                                    address.ToString().c_str()));
483   ByteString target_mac_address;
484   if (device_info_->GetMACAddressOfPeer(connection_->interface_index(), address,
485                                         &target_mac_address)) {
486     AddEventWithMessage(kTypeArpTableLookup, kPhaseEnd, kResultSuccess,
487                         StringPrintf("Found ARP table entry for %s",
488                                      address.ToString().c_str()));
489     ReportResultAndStop(address.Equals(connection_->gateway())
490                             ? kIssueGatewayNotResponding
491                             : kIssueServerNotResponding);
492     return;
493   }
494 
495   AddEventWithMessage(kTypeArpTableLookup, kPhaseEnd, kResultFailure,
496                       StringPrintf("Could not find ARP table entry for %s",
497                                    address.ToString().c_str()));
498   dispatcher_->PostTask(Bind(&ConnectionDiagnostics::CheckIpCollision,
499                              weak_ptr_factory_.GetWeakPtr()));
500 }
501 
FindNeighborTableEntry(const IPAddress & address)502 void ConnectionDiagnostics::FindNeighborTableEntry(const IPAddress& address) {
503   SLOG(this, 3) << __func__;
504 
505   if (address.family() != IPAddress::kFamilyIPv6) {
506     // We only perform neighbor table lookups for IPv6 addresses.
507     LOG(ERROR) << __func__ << ": " << address.ToString()
508                << " is not an IPv6 address";
509     AddEventWithMessage(
510         kTypeNeighborTableLookup, kPhaseStart, kResultFailure,
511         StringPrintf("%s is not an IPv6 address", address.ToString().c_str()));
512     ReportResultAndStop(kIssueInternalError);
513     return;
514   }
515 
516   neighbor_msg_listener_.reset(
517       new RTNLListener(RTNLHandler::kRequestNeighbor,
518                        Bind(&ConnectionDiagnostics::OnNeighborMsgReceived,
519                             weak_ptr_factory_.GetWeakPtr(), address)));
520   rtnl_handler_->RequestDump(RTNLHandler::kRequestNeighbor);
521 
522   neighbor_request_timeout_callback_.Reset(
523       Bind(&ConnectionDiagnostics::OnNeighborTableRequestTimeout,
524            weak_ptr_factory_.GetWeakPtr(), address));
525   dispatcher_->PostDelayedTask(route_query_timeout_callback_.callback(),
526                                kNeighborTableRequestTimeoutSeconds * 1000);
527   AddEventWithMessage(kTypeNeighborTableLookup, kPhaseStart, kResultSuccess,
528                       StringPrintf("Finding neighbor table entry for %s",
529                                    address.ToString().c_str()));
530 }
531 
CheckIpCollision()532 void ConnectionDiagnostics::CheckIpCollision() {
533   SLOG(this, 3) << __func__;
534 
535   if (!device_info_->GetMACAddress(connection_->interface_index(),
536                                    &local_mac_address_)) {
537     LOG(ERROR) << __func__ << ": could not get local MAC address";
538     AddEventWithMessage(kTypeIPCollisionCheck, kPhaseStart, kResultFailure,
539                         "Could not get local MAC address");
540     ReportResultAndStop(kIssueInternalError);
541     return;
542   }
543 
544   if (!arp_client_->StartReplyListener()) {
545     LOG(ERROR) << __func__ << ": failed to start ARP client";
546     AddEventWithMessage(kTypeIPCollisionCheck, kPhaseStart, kResultFailure,
547                         "Failed to start ARP client");
548     ReportResultAndStop(kIssueInternalError);
549     return;
550   }
551 
552   receive_response_handler_.reset(dispatcher_->CreateReadyHandler(
553       arp_client_->socket(), IOHandler::kModeInput,
554       Bind(&ConnectionDiagnostics::OnArpReplyReceived,
555            weak_ptr_factory_.GetWeakPtr())));
556 
557   ArpPacket request(connection_->local(), connection_->local(),
558                     local_mac_address_, ByteString());
559   if (!arp_client_->TransmitRequest(request)) {
560     LOG(ERROR) << __func__ << ": failed to send ARP request";
561     AddEventWithMessage(kTypeIPCollisionCheck, kPhaseStart, kResultFailure,
562                         "Failed to send ARP request");
563     arp_client_->Stop();
564     receive_response_handler_.reset();
565     ReportResultAndStop(kIssueInternalError);
566     return;
567   }
568 
569   arp_reply_timeout_callback_.Reset(
570       Bind(&ConnectionDiagnostics::OnArpRequestTimeout,
571            weak_ptr_factory_.GetWeakPtr()));
572   dispatcher_->PostDelayedTask(arp_reply_timeout_callback_.callback(),
573                                kArpReplyTimeoutSeconds * 1000);
574   AddEvent(kTypeIPCollisionCheck, kPhaseStart, kResultSuccess);
575 }
576 
PingHost(const IPAddress & address)577 void ConnectionDiagnostics::PingHost(const IPAddress& address) {
578   SLOG(this, 3) << __func__;
579 
580   Type event_type = address.Equals(connection_->gateway())
581                         ? kTypePingGateway
582                         : kTypePingTargetServer;
583   if (!icmp_session_->Start(
584           address, Bind(&ConnectionDiagnostics::OnPingHostComplete,
585                         weak_ptr_factory_.GetWeakPtr(), event_type, address))) {
586     LOG(ERROR) << __func__ << ": failed to start ICMP session with "
587                << address.ToString();
588     AddEventWithMessage(event_type, kPhaseStart, kResultFailure,
589                         StringPrintf("Failed to start ICMP session with %s",
590                                      address.ToString().c_str()));
591     ReportResultAndStop(kIssueInternalError);
592     return;
593   }
594 
595   AddEventWithMessage(event_type, kPhaseStart, kResultSuccess,
596                       StringPrintf("Pinging %s", address.ToString().c_str()));
597 }
598 
OnPingDNSServerComplete(int dns_server_index,const vector<base::TimeDelta> & result)599 void ConnectionDiagnostics::OnPingDNSServerComplete(
600     int dns_server_index, const vector<base::TimeDelta>& result) {
601   SLOG(this, 3) << __func__ << "(DNS server index " << dns_server_index << ")";
602 
603   if (!id_to_pending_dns_server_icmp_session_.erase(dns_server_index)) {
604     // This should not happen, since we expect exactly one callback for each
605     // IcmpSession started with a unique |dns_server_index| value in
606     // ConnectionDiagnostics::PingDNSServers. However, if this does happen for
607     // any reason, |id_to_pending_dns_server_icmp_session_| might never become
608     // empty, and we might never move to the next step after pinging DNS
609     // servers. Stop diagnostics immediately to prevent this from happening.
610     LOG(ERROR) << __func__
611                << ": no matching pending DNS server ICMP session found";
612     ReportResultAndStop(kIssueInternalError);
613     return;
614   }
615 
616   if (IcmpSession::AnyRepliesReceived(result)) {
617     pingable_dns_servers_.push_back(
618         connection_->dns_servers()[dns_server_index]);
619   }
620   if (!id_to_pending_dns_server_icmp_session_.empty()) {
621     SLOG(this, 3) << __func__ << ": not yet finished pinging all DNS servers";
622     return;
623   }
624 
625   if (pingable_dns_servers_.empty()) {
626     // Use the first DNS server on the list and diagnose its connectivity.
627     IPAddress first_dns_server_ip_addr(connection_->dns_servers()[0]);
628     if (first_dns_server_ip_addr.family() == IPAddress::kFamilyUnknown) {
629       LOG(ERROR) << __func__ << ": could not parse DNS server IP address "
630                  << connection_->dns_servers()[0];
631       AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultFailure,
632                           StringPrintf("Could not parse DNS "
633                                        "server IP address %s",
634                                        connection_->dns_servers()[0].c_str()));
635       ReportResultAndStop(kIssueInternalError);
636       return;
637     }
638     AddEventWithMessage(
639         kTypePingDNSServers, kPhaseEnd, kResultFailure,
640         StringPrintf(
641             "No DNS servers responded to pings. Pinging first DNS server at %s",
642             first_dns_server_ip_addr.ToString().c_str()));
643     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindRouteToHost,
644                                weak_ptr_factory_.GetWeakPtr(),
645                                first_dns_server_ip_addr));
646     return;
647   }
648 
649   if (pingable_dns_servers_.size() != connection_->dns_servers().size()) {
650     AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultSuccess,
651                         "Pinged some, but not all, DNS servers successfully");
652   } else {
653     AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultSuccess,
654                         "Pinged all DNS servers successfully");
655   }
656 
657   if (num_dns_attempts_ < kMaxDNSRetries) {
658     dispatcher_->PostTask(
659         Bind(&ConnectionDiagnostics::ResolveTargetServerIPAddress,
660              weak_ptr_factory_.GetWeakPtr(), pingable_dns_servers_));
661   } else {
662     SLOG(this, 3) << __func__ << ": max DNS resolution attempts reached";
663     ReportResultAndStop(kIssueDNSServerNoResponse);
664   }
665 }
666 
OnDNSResolutionComplete(const Error & error,const IPAddress & address)667 void ConnectionDiagnostics::OnDNSResolutionComplete(const Error& error,
668                                                     const IPAddress& address) {
669   SLOG(this, 3) << __func__;
670 
671   if (error.IsSuccess()) {
672     AddEventWithMessage(
673         kTypeResolveTargetServerIP, kPhaseEnd, kResultSuccess,
674         StringPrintf("Target address is %s", address.ToString().c_str()));
675     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingHost,
676                                weak_ptr_factory_.GetWeakPtr(), address));
677   } else if (error.type() == Error::kOperationTimeout) {
678     AddEventWithMessage(
679         kTypeResolveTargetServerIP, kPhaseEnd, kResultTimeout,
680         StringPrintf("DNS resolution timed out: %s", error.message().c_str()));
681     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingDNSServers,
682                                weak_ptr_factory_.GetWeakPtr()));
683   } else {
684     AddEventWithMessage(
685         kTypeResolveTargetServerIP, kPhaseEnd, kResultFailure,
686         StringPrintf("DNS resolution failed: %s", error.message().c_str()));
687     ReportResultAndStop(kIssueDNSServerMisconfig);
688   }
689 }
690 
OnPingHostComplete(Type ping_event_type,const IPAddress & address_pinged,const vector<base::TimeDelta> & result)691 void ConnectionDiagnostics::OnPingHostComplete(
692     Type ping_event_type, const IPAddress& address_pinged,
693     const vector<base::TimeDelta>& result) {
694   SLOG(this, 3) << __func__;
695 
696   string message(StringPrintf("Destination: %s,  Latencies: ",
697                               address_pinged.ToString().c_str()));
698   for (const auto& latency : result) {
699     if (latency.is_zero()) {
700       message.append("NA ");
701     } else {
702       message.append(StringPrintf("%4.2fms ", latency.InMillisecondsF()));
703     }
704   }
705 
706   Result result_type =
707       IcmpSession::AnyRepliesReceived(result) ? kResultSuccess : kResultFailure;
708   if (IcmpSession::IsPacketLossPercentageGreaterThan(result, 50)) {
709     LOG(WARNING) << __func__ << ": high packet loss when pinging "
710                  << address_pinged.ToString();
711   }
712   AddEventWithMessage(ping_event_type, kPhaseEnd, result_type, message);
713   if (result_type == kResultSuccess) {
714     // If pinging the target web server succeeded, we have found a HTTP issue or
715     // broken portal. Otherwise, if pinging the gateway succeeded, we have found
716     // an upstream connectivity problem or gateway issue.
717     ReportResultAndStop(ping_event_type == kTypePingGateway
718                             ? kIssueGatewayUpstream
719                             : kIssueHTTPBrokenPortal);
720   } else if (result_type == kResultFailure &&
721              ping_event_type == kTypePingTargetServer) {
722     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindRouteToHost,
723                                weak_ptr_factory_.GetWeakPtr(), address_pinged));
724   } else if (result_type == kResultFailure &&
725              ping_event_type == kTypePingGateway &&
726              address_pinged.family() == IPAddress::kFamilyIPv4) {
727     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindArpTableEntry,
728                                weak_ptr_factory_.GetWeakPtr(), address_pinged));
729   } else {
730     // We failed to ping an IPv6 gateway. Check for neighbor table entry for
731     // this gateway.
732     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindNeighborTableEntry,
733                                weak_ptr_factory_.GetWeakPtr(), address_pinged));
734   }
735 }
736 
OnArpReplyReceived(int fd)737 void ConnectionDiagnostics::OnArpReplyReceived(int fd) {
738   SLOG(this, 3) << __func__ << "(fd " << fd << ")";
739 
740   ArpPacket packet;
741   ByteString sender;
742   if (!arp_client_->ReceivePacket(&packet, &sender)) {
743     return;
744   }
745 
746   if (!packet.IsReply()) {
747     SLOG(this, 4) << __func__ << ": this is not a reply packet. Ignoring.";
748     return;
749   }
750 
751   if (!connection_->local().address().Equals(
752           packet.remote_ip_address().address())) {
753     SLOG(this, 4) << __func__ << ": response is not for our IP address.";
754     return;
755   }
756 
757   if (!local_mac_address_.Equals(packet.remote_mac_address())) {
758     SLOG(this, 4) << __func__ << ": response is not for our MAC address.";
759     return;
760   }
761 
762   if (connection_->local().address().Equals(
763           packet.local_ip_address().address())) {
764     arp_reply_timeout_callback_.Cancel();
765     AddEventWithMessage(kTypeIPCollisionCheck, kPhaseEnd, kResultSuccess,
766                         "IP collision found");
767     ReportResultAndStop(kIssueIPCollision);
768   }
769 }
770 
OnArpRequestTimeout()771 void ConnectionDiagnostics::OnArpRequestTimeout() {
772   SLOG(this, 3) << __func__;
773 
774   AddEventWithMessage(kTypeIPCollisionCheck, kPhaseEnd, kResultFailure,
775                       "No IP collision found");
776   // TODO(samueltan): perform link-level diagnostics.
777   if (DoesPreviousEventMatch(
778           kTypePingGateway, kPhaseEnd, kResultFailure,
779           kNumEventsFromPingGatewayEndToIpCollisionCheckEnd)) {
780     // We came here from failing to ping the gateway.
781     ReportResultAndStop(kIssueGatewayArpFailed);
782   } else {
783     // Otherwise, we must have come here from failing to ping the target web
784     // server and successfully finding a route.
785     ReportResultAndStop(kIssueServerArpFailed);
786   }
787 }
788 
OnNeighborMsgReceived(const IPAddress & address_queried,const RTNLMessage & msg)789 void ConnectionDiagnostics::OnNeighborMsgReceived(
790     const IPAddress& address_queried, const RTNLMessage& msg) {
791   SLOG(this, 3) << __func__;
792 
793   DCHECK(msg.type() == RTNLMessage::kTypeNeighbor);
794   const RTNLMessage::NeighborStatus& neighbor = msg.neighbor_status();
795 
796   if (neighbor.type != NDA_DST || !msg.HasAttribute(NDA_DST)) {
797     SLOG(this, 4) << __func__ << ": neighbor message has no destination";
798     return;
799   }
800 
801   IPAddress address(msg.family(), msg.GetAttribute(NDA_DST));
802   if (!address.Equals(address_queried)) {
803     SLOG(this, 4) << __func__ << ": destination address (" << address.ToString()
804                   << ") does not match address queried ("
805                   << address_queried.ToString() << ")";
806     return;
807   }
808 
809   neighbor_request_timeout_callback_.Cancel();
810   if (!(neighbor.state & (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE))) {
811     AddEventWithMessage(
812         kTypeNeighborTableLookup, kPhaseEnd, kResultFailure,
813         StringPrintf("Neighbor table entry for %s is not in a connected state "
814                      "(actual state = 0x%2x)",
815                      address_queried.ToString().c_str(), neighbor.state));
816     ReportResultAndStop(address_queried.Equals(connection_->gateway())
817                             ? kIssueGatewayNeighborEntryNotConnected
818                             : kIssueServerNeighborEntryNotConnected);
819     return;
820   }
821 
822   AddEventWithMessage(kTypeNeighborTableLookup, kPhaseEnd, kResultSuccess,
823                       StringPrintf("Neighbor table entry found for %s",
824                                    address_queried.ToString().c_str()));
825   ReportResultAndStop(address_queried.Equals(connection_->gateway())
826                           ? kIssueGatewayNotResponding
827                           : kIssueServerNotResponding);
828 }
829 
OnNeighborTableRequestTimeout(const IPAddress & address_queried)830 void ConnectionDiagnostics::OnNeighborTableRequestTimeout(
831     const IPAddress& address_queried) {
832   SLOG(this, 3) << __func__;
833 
834   AddEventWithMessage(kTypeNeighborTableLookup, kPhaseEnd, kResultFailure,
835                       StringPrintf("Failed to find neighbor table entry for %s",
836                                    address_queried.ToString().c_str()));
837   ReportResultAndStop(address_queried.Equals(connection_->gateway())
838                           ? kIssueGatewayNoNeighborEntry
839                           : kIssueServerNoNeighborEntry);
840 }
841 
OnRouteQueryResponse(int interface_index,const RoutingTableEntry & entry)842 void ConnectionDiagnostics::OnRouteQueryResponse(
843     int interface_index, const RoutingTableEntry& entry) {
844   SLOG(this, 3) << __func__ << "(interface " << interface_index << ")";
845 
846   if (interface_index != connection_->interface_index()) {
847     SLOG(this, 3) << __func__
848                   << ": route query response not meant for this interface";
849     return;
850   }
851 
852   route_query_timeout_callback_.Cancel();
853   AddEventWithMessage(
854       kTypeFindRoute, kPhaseEnd, kResultSuccess,
855       StringPrintf("Found route to %s (%s)", entry.dst.ToString().c_str(),
856                    entry.gateway.IsDefault() ? "remote" : "local"));
857   if (!entry.gateway.IsDefault()) {
858     // We have a route to a remote destination, so ping the route gateway to
859     // check if we have a means of reaching this host.
860     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingHost,
861                                weak_ptr_factory_.GetWeakPtr(), entry.gateway));
862   } else if (entry.dst.family() == IPAddress::kFamilyIPv4) {
863     // We have a route to a local IPv4 destination, so check for an ARP table
864     // entry.
865     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindArpTableEntry,
866                                weak_ptr_factory_.GetWeakPtr(), entry.dst));
867   } else {
868     // We have a route to a local IPv6 destination, so check for a neighbor
869     // table entry.
870     dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindNeighborTableEntry,
871                                weak_ptr_factory_.GetWeakPtr(), entry.dst));
872   }
873 }
874 
OnRouteQueryTimeout()875 void ConnectionDiagnostics::OnRouteQueryTimeout() {
876   SLOG(this, 3) << __func__;
877 
878   AddEvent(kTypeFindRoute, kPhaseEnd, kResultFailure);
879   ReportResultAndStop(kIssueRouting);
880 }
881 
DoesPreviousEventMatch(Type type,Phase phase,Result result,size_t num_events_ago)882 bool ConnectionDiagnostics::DoesPreviousEventMatch(Type type, Phase phase,
883                                                    Result result,
884                                                    size_t num_events_ago) {
885   int event_index = diagnostic_events_.size() - 1 - num_events_ago;
886   if (event_index < 0) {
887     LOG(ERROR) << __func__ << ": requested event " << num_events_ago
888                << " before the last event, but we only have "
889                << diagnostic_events_.size() << " logged";
890     return false;
891   }
892 
893   return (diagnostic_events_[event_index].type == type &&
894           diagnostic_events_[event_index].phase == phase &&
895           diagnostic_events_[event_index].result == result);
896 }
897 
898 }  // namespace shill
899