1 //
2 // Copyright (C) 2015 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "shill/connection_diagnostics.h"
18
19 #include <base/bind.h>
20 #include <base/strings/stringprintf.h>
21
22 #include "shill/arp_client.h"
23 #include "shill/arp_packet.h"
24 #include "shill/connection.h"
25 #include "shill/connectivity_trial.h"
26 #include "shill/device_info.h"
27 #include "shill/dns_client.h"
28 #include "shill/dns_client_factory.h"
29 #include "shill/error.h"
30 #include "shill/event_dispatcher.h"
31 #include "shill/http_url.h"
32 #include "shill/icmp_session.h"
33 #include "shill/icmp_session_factory.h"
34 #include "shill/logging.h"
35 #include "shill/metrics.h"
36 #include "shill/net/byte_string.h"
37 #include "shill/net/rtnl_handler.h"
38 #include "shill/net/rtnl_listener.h"
39 #include "shill/net/rtnl_message.h"
40 #include "shill/routing_table.h"
41 #include "shill/routing_table_entry.h"
42
43 using base::Bind;
44 using base::StringPrintf;
45 using std::string;
46 using std::vector;
47
48 namespace {
49 // These strings are dependent on ConnectionDiagnostics::Type. Any changes to
50 // this array should be synced with ConnectionDiagnostics::Type.
51 const char* kEventNames[] = {
52 "Portal detection",
53 "Ping DNS servers",
54 "DNS resolution",
55 "Ping (target web server)",
56 "Ping (gateway)",
57 "Find route",
58 "ARP table lookup",
59 "Neighbor table lookup",
60 "IP collision check"
61 };
62 // These strings are dependent on ConnectionDiagnostics::Phase. Any changes to
63 // this array should be synced with ConnectionDiagnostics::Phase.
64 const char* kPhaseNames[] = {
65 "Start",
66 "End",
67 "End (Content)",
68 "End (DNS)",
69 "End (HTTP/CXN)"
70 };
71 // These strings are dependent on ConnectionDiagnostics::Result. Any changes to
72 // this array should be synced with ConnectionDiagnostics::Result.
73 const char* kResultNames[] = {
74 "Success",
75 "Failure",
76 "Timeout"
77 };
78 // After we fail to ping the gateway, we 1) start ARP lookup, 2) fail ARP
79 // lookup, 3) start IP collision check, 4) end IP collision check.
80 const int kNumEventsFromPingGatewayEndToIpCollisionCheckEnd = 4;
81 } // namespace
82
83 namespace shill {
84
85 namespace Logging {
86 static auto kModuleLogScope = ScopeLogger::kWiFi;
ObjectID(ConnectionDiagnostics * n)87 static string ObjectID(ConnectionDiagnostics* n) {
88 return "(connection_diagnostics)";
89 }
90 }
91
92 const char ConnectionDiagnostics::kIssueIPCollision[] =
93 "IP collision detected. Another host on the local network has been "
94 "assigned the same IP address.";
95 const char ConnectionDiagnostics::kIssueRouting[] = "Routing problem detected.";
96 const char ConnectionDiagnostics::kIssueHTTPBrokenPortal[] =
97 "Target URL is pingable. Connectivity problems might be caused by HTTP "
98 "issues on the server or a broken portal.";
99 const char ConnectionDiagnostics::kIssueDNSServerMisconfig[] =
100 "DNS servers responding to DNS queries, but sending invalid responses. "
101 "DNS servers might be misconfigured.";
102 const char ConnectionDiagnostics::kIssueDNSServerNoResponse[] =
103 "At least one DNS server is pingable, but is not responding to DNS "
104 "requests. DNS server issue detected.";
105 const char ConnectionDiagnostics::kIssueNoDNSServersConfigured[] =
106 "No DNS servers have been configured for this connection -- either the "
107 "DHCP server or user configuration is invalid.";
108 const char ConnectionDiagnostics::kIssueDNSServersInvalid[] =
109 "All configured DNS server addresses are invalid.";
110 const char ConnectionDiagnostics::kIssueNone[] =
111 "No connection issue detected.";
112 const char ConnectionDiagnostics::kIssueCaptivePortal[] =
113 "Trapped in captive portal.";
114 const char ConnectionDiagnostics::kIssueGatewayUpstream[] =
115 "We can find a route to the target web server at a remote IP address, "
116 "and the local gateway is pingable. Gatway issue or upstream "
117 "connectivity problem detected.";
118 const char ConnectionDiagnostics::kIssueGatewayNotResponding[] =
119 "This gateway appears to be on the local network, but is not responding to "
120 "pings.";
121 const char ConnectionDiagnostics::kIssueServerNotResponding[] =
122 "This web server appears to be on the local network, but is not responding "
123 "to pings.";
124 const char ConnectionDiagnostics::kIssueGatewayArpFailed[] =
125 "No ARP entry for the gateway. Either the gateway does not exist on the "
126 "local network, or there are link layer issues.";
127 const char ConnectionDiagnostics::kIssueServerArpFailed[] =
128 "No ARP entry for the web server. Either the web server does not exist on "
129 "the local network, or there are link layer issues.";
130 const char ConnectionDiagnostics::kIssueInternalError[] =
131 "The connection diagnostics encountered an internal failure.";
132 const char ConnectionDiagnostics::kIssueGatewayNoNeighborEntry[] =
133 "No neighbor table entry for the gateway. Either the gateway does not "
134 "exist on the local network, or there are link layer issues.";
135 const char ConnectionDiagnostics::kIssueServerNoNeighborEntry[] =
136 "No neighbor table entry for the web server. Either the web server does "
137 "not exist on the local network, or there are link layer issues.";
138 const char ConnectionDiagnostics::kIssueGatewayNeighborEntryNotConnected[] =
139 "Neighbor table entry for the gateway is not in a connected state. Either "
140 "the web server does not exist on the local network, or there are link "
141 "layer issues.";
142 const char ConnectionDiagnostics::kIssueServerNeighborEntryNotConnected[] =
143 "Neighbor table entry for the web server is not in a connected state. "
144 "Either the web server does not exist on the local network, or there are "
145 "link layer issues.";
146 const int ConnectionDiagnostics::kMaxDNSRetries = 2;
147 const int ConnectionDiagnostics::kRouteQueryTimeoutSeconds = 1;
148 const int ConnectionDiagnostics::kArpReplyTimeoutSeconds = 1;
149 const int ConnectionDiagnostics::kNeighborTableRequestTimeoutSeconds = 1;
150 const int ConnectionDiagnostics::kDNSTimeoutSeconds = 3;
151
ConnectionDiagnostics(ConnectionRefPtr connection,EventDispatcher * dispatcher,Metrics * metrics,const DeviceInfo * device_info,const ResultCallback & result_callback)152 ConnectionDiagnostics::ConnectionDiagnostics(
153 ConnectionRefPtr connection, EventDispatcher* dispatcher, Metrics* metrics,
154 const DeviceInfo* device_info, const ResultCallback& result_callback)
155 : weak_ptr_factory_(this),
156 dispatcher_(dispatcher),
157 metrics_(metrics),
158 routing_table_(RoutingTable::GetInstance()),
159 rtnl_handler_(RTNLHandler::GetInstance()),
160 connection_(connection),
161 device_info_(device_info),
162 dns_client_factory_(DNSClientFactory::GetInstance()),
163 portal_detector_(new PortalDetector(
164 connection_, dispatcher_,
165 Bind(&ConnectionDiagnostics::StartAfterPortalDetectionInternal,
166 weak_ptr_factory_.GetWeakPtr()))),
167 arp_client_(new ArpClient(connection_->interface_index())),
168 icmp_session_(new IcmpSession(dispatcher_)),
169 icmp_session_factory_(IcmpSessionFactory::GetInstance()),
170 num_dns_attempts_(0),
171 running_(false),
172 result_callback_(result_callback) {}
173
~ConnectionDiagnostics()174 ConnectionDiagnostics::~ConnectionDiagnostics() {
175 Stop();
176 }
177
Start(const string & url_string)178 bool ConnectionDiagnostics::Start(const string& url_string) {
179 SLOG(this, 3) << __func__ << "(" << url_string << ")";
180
181 if (running()) {
182 LOG(ERROR) << "Connection diagnostics already started";
183 return false;
184 }
185
186 target_url_.reset(new HTTPURL());
187 if (!target_url_->ParseFromString(url_string)) {
188 LOG(ERROR) << "Failed to parse URL string: " << url_string;
189 Stop();
190 return false;
191 }
192
193 if (!portal_detector_->Start(url_string)) {
194 Stop();
195 return false;
196 }
197
198 running_ = true;
199 AddEvent(kTypePortalDetection, kPhaseStart, kResultSuccess);
200 return true;
201 }
202
StartAfterPortalDetection(const string & url_string,const PortalDetector::Result & result)203 bool ConnectionDiagnostics::StartAfterPortalDetection(
204 const string& url_string, const PortalDetector::Result& result) {
205 SLOG(this, 3) << __func__ << "(" << url_string << ")";
206
207 if (running()) {
208 LOG(ERROR) << "Connection diagnostics already started";
209 return false;
210 }
211
212 target_url_.reset(new HTTPURL());
213 if (!target_url_->ParseFromString(url_string)) {
214 LOG(ERROR) << "Failed to parse URL string: " << url_string;
215 Stop();
216 return false;
217 }
218
219 running_ = true;
220 dispatcher_->PostTask(
221 Bind(&ConnectionDiagnostics::StartAfterPortalDetectionInternal,
222 weak_ptr_factory_.GetWeakPtr(), result));
223 return true;
224 }
225
Stop()226 void ConnectionDiagnostics::Stop() {
227 SLOG(this, 3) << __func__;
228
229 running_ = false;
230 num_dns_attempts_ = 0;
231 diagnostic_events_.clear();
232 dns_client_.reset();
233 arp_client_->Stop();
234 icmp_session_->Stop();
235 portal_detector_.reset();
236 receive_response_handler_.reset();
237 neighbor_msg_listener_.reset();
238 id_to_pending_dns_server_icmp_session_.clear();
239 target_url_.reset();
240 route_query_callback_.Cancel();
241 route_query_timeout_callback_.Cancel();
242 arp_reply_timeout_callback_.Cancel();
243 neighbor_request_timeout_callback_.Cancel();
244 }
245
246 // static
EventToString(const Event & event)247 string ConnectionDiagnostics::EventToString(const Event& event) {
248 string message("");
249 message.append(StringPrintf("Event: %-26sPhase: %-17sResult: %-10s",
250 kEventNames[event.type], kPhaseNames[event.phase],
251 kResultNames[event.result]));
252 if (!event.message.empty()) {
253 message.append(StringPrintf("Msg: %s", event.message.c_str()));
254 }
255 return message;
256 }
257
AddEvent(Type type,Phase phase,Result result)258 void ConnectionDiagnostics::AddEvent(Type type, Phase phase, Result result) {
259 AddEventWithMessage(type, phase, result, "");
260 }
261
AddEventWithMessage(Type type,Phase phase,Result result,const string & message)262 void ConnectionDiagnostics::AddEventWithMessage(Type type, Phase phase,
263 Result result,
264 const string& message) {
265 diagnostic_events_.push_back(Event(type, phase, result, message));
266 }
267
ReportResultAndStop(const string & issue)268 void ConnectionDiagnostics::ReportResultAndStop(const string& issue) {
269 SLOG(this, 3) << __func__;
270
271 metrics_->NotifyConnectionDiagnosticsIssue(issue);
272 if (!result_callback_.is_null()) {
273 LOG(INFO) << "Connection diagnostics events:";
274 for (size_t i = 0; i < diagnostic_events_.size(); ++i) {
275 LOG(INFO) << " #" << i << ": "
276 << EventToString(diagnostic_events_[i]);
277 }
278 LOG(INFO) << "Connection diagnostics completed. Connection issue: "
279 << issue;
280 result_callback_.Run(issue, diagnostic_events_);
281 }
282 Stop();
283 }
284
StartAfterPortalDetectionInternal(const PortalDetector::Result & result)285 void ConnectionDiagnostics::StartAfterPortalDetectionInternal(
286 const PortalDetector::Result& result) {
287 SLOG(this, 3) << __func__;
288
289 Result result_type;
290 if (result.trial_result.status == ConnectivityTrial::kStatusSuccess) {
291 result_type = kResultSuccess;
292 } else if (result.trial_result.status == ConnectivityTrial::kStatusTimeout) {
293 result_type = kResultTimeout;
294 } else {
295 result_type = kResultFailure;
296 }
297
298 switch (result.trial_result.phase) {
299 case ConnectivityTrial::kPhaseContent: {
300 AddEvent(kTypePortalDetection, kPhasePortalDetectionEndContent,
301 result_type);
302 // We have found the issue if we end in the content phase.
303 ReportResultAndStop(result_type == kResultSuccess ? kIssueNone
304 : kIssueCaptivePortal);
305 break;
306 }
307 case ConnectivityTrial::kPhaseDNS: {
308 AddEvent(kTypePortalDetection, kPhasePortalDetectionEndDNS, result_type);
309 if (result.trial_result.status == ConnectivityTrial::kStatusSuccess) {
310 LOG(ERROR) << __func__ << ": portal detection should not end with "
311 "success status in DNS phase";
312 ReportResultAndStop(kIssueInternalError);
313 } else if (result.trial_result.status ==
314 ConnectivityTrial::kStatusTimeout) {
315 // DNS timeout occurred in portal detection. Ping DNS servers to make
316 // sure they are reachable.
317 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingDNSServers,
318 weak_ptr_factory_.GetWeakPtr()));
319 } else {
320 ReportResultAndStop(kIssueDNSServerMisconfig);
321 }
322 break;
323 }
324 case ConnectivityTrial::kPhaseConnection:
325 case ConnectivityTrial::kPhaseHTTP:
326 case ConnectivityTrial::kPhaseUnknown:
327 default: {
328 AddEvent(kTypePortalDetection, kPhasePortalDetectionEndOther,
329 result_type);
330 if (result.trial_result.status == ConnectivityTrial::kStatusSuccess) {
331 LOG(ERROR) << __func__
332 << ": portal detection should not end with success status in"
333 " Connection/HTTP/Unknown phase";
334 ReportResultAndStop(kIssueInternalError);
335 } else {
336 dispatcher_->PostTask(
337 Bind(&ConnectionDiagnostics::ResolveTargetServerIPAddress,
338 weak_ptr_factory_.GetWeakPtr(), connection_->dns_servers()));
339 }
340 break;
341 }
342 }
343 }
344
ResolveTargetServerIPAddress(const vector<string> & dns_servers)345 void ConnectionDiagnostics::ResolveTargetServerIPAddress(
346 const vector<string>& dns_servers) {
347 SLOG(this, 3) << __func__;
348
349 Error e;
350 dns_client_.reset(dns_client_factory_->CreateDNSClient(
351 connection_->IsIPv6() ? IPAddress::kFamilyIPv6 : IPAddress::kFamilyIPv4,
352 connection_->interface_name(), dns_servers, kDNSTimeoutSeconds * 1000,
353 dispatcher_, Bind(&ConnectionDiagnostics::OnDNSResolutionComplete,
354 weak_ptr_factory_.GetWeakPtr())));
355 if (!dns_client_->Start(target_url_->host(), &e)) {
356 LOG(ERROR) << __func__ << ": could not start DNS -- " << e.message();
357 AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseStart, kResultFailure,
358 e.message().c_str());
359 ReportResultAndStop(kIssueInternalError);
360 return;
361 }
362
363 AddEventWithMessage(kTypeResolveTargetServerIP, kPhaseStart, kResultSuccess,
364 StringPrintf("Attempt #%d", num_dns_attempts_));
365 SLOG(this, 3) << __func__ << ": looking up " << target_url_->host()
366 << " (attempt " << num_dns_attempts_ << ")";
367 ++num_dns_attempts_;
368 }
369
PingDNSServers()370 void ConnectionDiagnostics::PingDNSServers() {
371 SLOG(this, 3) << __func__;
372
373 if (connection_->dns_servers().empty()) {
374 LOG(ERROR) << __func__ << ": no DNS servers for this connection";
375 AddEventWithMessage(kTypePingDNSServers, kPhaseStart, kResultFailure,
376 "No DNS servers for this connection");
377 ReportResultAndStop(kIssueNoDNSServersConfigured);
378 return;
379 }
380
381 id_to_pending_dns_server_icmp_session_.clear();
382 pingable_dns_servers_.clear();
383 size_t num_invalid_dns_server_addr = 0;
384 size_t num_failed_icmp_session_start = 0;
385 for (size_t i = 0; i < connection_->dns_servers().size(); ++i) {
386 // If we encounter any errors starting ping for any DNS server, carry on
387 // attempting to ping the other DNS servers rather than failing. We only
388 // need to successfully ping a single DNS server to decide whether or not
389 // DNS servers can be reached.
390 IPAddress dns_server_ip_addr(connection_->dns_servers()[i]);
391 if (dns_server_ip_addr.family() == IPAddress::kFamilyUnknown) {
392 LOG(ERROR) << __func__
393 << ": could not parse DNS server IP address from string";
394 ++num_invalid_dns_server_addr;
395 continue;
396 }
397
398 bool emplace_success =
399 (id_to_pending_dns_server_icmp_session_.emplace(
400 i, std::unique_ptr<IcmpSession>(
401 icmp_session_factory_->CreateIcmpSession(dispatcher_))))
402 .second;
403 if (emplace_success &&
404 id_to_pending_dns_server_icmp_session_.at(i)
405 ->Start(dns_server_ip_addr,
406 Bind(&ConnectionDiagnostics::OnPingDNSServerComplete,
407 weak_ptr_factory_.GetWeakPtr(), i))) {
408 SLOG(this, 3) << __func__ << ": pinging DNS server at "
409 << dns_server_ip_addr.ToString();
410 } else {
411 LOG(ERROR) << "Failed to initiate ping for DNS server at "
412 << dns_server_ip_addr.ToString();
413 ++num_failed_icmp_session_start;
414 if (emplace_success) {
415 id_to_pending_dns_server_icmp_session_.erase(i);
416 }
417 }
418 }
419
420 if (id_to_pending_dns_server_icmp_session_.empty()) {
421 AddEventWithMessage(
422 kTypePingDNSServers, kPhaseStart, kResultFailure,
423 "Could not start ping for any of the given DNS servers");
424 if (num_invalid_dns_server_addr == connection_->dns_servers().size()) {
425 ReportResultAndStop(kIssueDNSServersInvalid);
426 } else if (num_failed_icmp_session_start ==
427 connection_->dns_servers().size()) {
428 ReportResultAndStop(kIssueInternalError);
429 }
430 } else {
431 AddEvent(kTypePingDNSServers, kPhaseStart, kResultSuccess);
432 }
433 }
434
FindRouteToHost(const IPAddress & address)435 void ConnectionDiagnostics::FindRouteToHost(const IPAddress& address) {
436 SLOG(this, 3) << __func__;
437
438 RoutingTableEntry entry;
439 route_query_callback_.Reset(Bind(&ConnectionDiagnostics::OnRouteQueryResponse,
440 weak_ptr_factory_.GetWeakPtr()));
441 if (!routing_table_->RequestRouteToHost(
442 address, connection_->interface_index(), -1,
443 route_query_callback_.callback(), connection_->table_id())) {
444 route_query_callback_.Cancel();
445 LOG(ERROR) << __func__ << ": could not request route to "
446 << address.ToString();
447 AddEventWithMessage(kTypeFindRoute, kPhaseStart, kResultFailure,
448 StringPrintf("Could not request route to %s",
449 address.ToString().c_str()));
450 ReportResultAndStop(kIssueInternalError);
451 return;
452 }
453
454 // RoutingTable implementation does not have a built-in timeout mechanism
455 // for un-replied route requests, so use our own.
456 route_query_timeout_callback_.Reset(
457 Bind(&ConnectionDiagnostics::OnRouteQueryTimeout,
458 weak_ptr_factory_.GetWeakPtr()));
459 dispatcher_->PostDelayedTask(route_query_timeout_callback_.callback(),
460 kRouteQueryTimeoutSeconds * 1000);
461 AddEventWithMessage(
462 kTypeFindRoute, kPhaseStart, kResultSuccess,
463 StringPrintf("Requesting route to %s", address.ToString().c_str()));
464 }
465
FindArpTableEntry(const IPAddress & address)466 void ConnectionDiagnostics::FindArpTableEntry(const IPAddress& address) {
467 SLOG(this, 3) << __func__;
468
469 if (address.family() != IPAddress::kFamilyIPv4) {
470 // We only perform ARP table lookups for IPv4 addresses.
471 LOG(ERROR) << __func__ << ": " << address.ToString()
472 << " is not an IPv4 address";
473 AddEventWithMessage(
474 kTypeArpTableLookup, kPhaseStart, kResultFailure,
475 StringPrintf("%s is not an IPv4 address", address.ToString().c_str()));
476 ReportResultAndStop(kIssueInternalError);
477 return;
478 }
479
480 AddEventWithMessage(kTypeArpTableLookup, kPhaseStart, kResultSuccess,
481 StringPrintf("Finding ARP table entry for %s",
482 address.ToString().c_str()));
483 ByteString target_mac_address;
484 if (device_info_->GetMACAddressOfPeer(connection_->interface_index(), address,
485 &target_mac_address)) {
486 AddEventWithMessage(kTypeArpTableLookup, kPhaseEnd, kResultSuccess,
487 StringPrintf("Found ARP table entry for %s",
488 address.ToString().c_str()));
489 ReportResultAndStop(address.Equals(connection_->gateway())
490 ? kIssueGatewayNotResponding
491 : kIssueServerNotResponding);
492 return;
493 }
494
495 AddEventWithMessage(kTypeArpTableLookup, kPhaseEnd, kResultFailure,
496 StringPrintf("Could not find ARP table entry for %s",
497 address.ToString().c_str()));
498 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::CheckIpCollision,
499 weak_ptr_factory_.GetWeakPtr()));
500 }
501
FindNeighborTableEntry(const IPAddress & address)502 void ConnectionDiagnostics::FindNeighborTableEntry(const IPAddress& address) {
503 SLOG(this, 3) << __func__;
504
505 if (address.family() != IPAddress::kFamilyIPv6) {
506 // We only perform neighbor table lookups for IPv6 addresses.
507 LOG(ERROR) << __func__ << ": " << address.ToString()
508 << " is not an IPv6 address";
509 AddEventWithMessage(
510 kTypeNeighborTableLookup, kPhaseStart, kResultFailure,
511 StringPrintf("%s is not an IPv6 address", address.ToString().c_str()));
512 ReportResultAndStop(kIssueInternalError);
513 return;
514 }
515
516 neighbor_msg_listener_.reset(
517 new RTNLListener(RTNLHandler::kRequestNeighbor,
518 Bind(&ConnectionDiagnostics::OnNeighborMsgReceived,
519 weak_ptr_factory_.GetWeakPtr(), address)));
520 rtnl_handler_->RequestDump(RTNLHandler::kRequestNeighbor);
521
522 neighbor_request_timeout_callback_.Reset(
523 Bind(&ConnectionDiagnostics::OnNeighborTableRequestTimeout,
524 weak_ptr_factory_.GetWeakPtr(), address));
525 dispatcher_->PostDelayedTask(route_query_timeout_callback_.callback(),
526 kNeighborTableRequestTimeoutSeconds * 1000);
527 AddEventWithMessage(kTypeNeighborTableLookup, kPhaseStart, kResultSuccess,
528 StringPrintf("Finding neighbor table entry for %s",
529 address.ToString().c_str()));
530 }
531
CheckIpCollision()532 void ConnectionDiagnostics::CheckIpCollision() {
533 SLOG(this, 3) << __func__;
534
535 if (!device_info_->GetMACAddress(connection_->interface_index(),
536 &local_mac_address_)) {
537 LOG(ERROR) << __func__ << ": could not get local MAC address";
538 AddEventWithMessage(kTypeIPCollisionCheck, kPhaseStart, kResultFailure,
539 "Could not get local MAC address");
540 ReportResultAndStop(kIssueInternalError);
541 return;
542 }
543
544 if (!arp_client_->StartReplyListener()) {
545 LOG(ERROR) << __func__ << ": failed to start ARP client";
546 AddEventWithMessage(kTypeIPCollisionCheck, kPhaseStart, kResultFailure,
547 "Failed to start ARP client");
548 ReportResultAndStop(kIssueInternalError);
549 return;
550 }
551
552 receive_response_handler_.reset(dispatcher_->CreateReadyHandler(
553 arp_client_->socket(), IOHandler::kModeInput,
554 Bind(&ConnectionDiagnostics::OnArpReplyReceived,
555 weak_ptr_factory_.GetWeakPtr())));
556
557 ArpPacket request(connection_->local(), connection_->local(),
558 local_mac_address_, ByteString());
559 if (!arp_client_->TransmitRequest(request)) {
560 LOG(ERROR) << __func__ << ": failed to send ARP request";
561 AddEventWithMessage(kTypeIPCollisionCheck, kPhaseStart, kResultFailure,
562 "Failed to send ARP request");
563 arp_client_->Stop();
564 receive_response_handler_.reset();
565 ReportResultAndStop(kIssueInternalError);
566 return;
567 }
568
569 arp_reply_timeout_callback_.Reset(
570 Bind(&ConnectionDiagnostics::OnArpRequestTimeout,
571 weak_ptr_factory_.GetWeakPtr()));
572 dispatcher_->PostDelayedTask(arp_reply_timeout_callback_.callback(),
573 kArpReplyTimeoutSeconds * 1000);
574 AddEvent(kTypeIPCollisionCheck, kPhaseStart, kResultSuccess);
575 }
576
PingHost(const IPAddress & address)577 void ConnectionDiagnostics::PingHost(const IPAddress& address) {
578 SLOG(this, 3) << __func__;
579
580 Type event_type = address.Equals(connection_->gateway())
581 ? kTypePingGateway
582 : kTypePingTargetServer;
583 if (!icmp_session_->Start(
584 address, Bind(&ConnectionDiagnostics::OnPingHostComplete,
585 weak_ptr_factory_.GetWeakPtr(), event_type, address))) {
586 LOG(ERROR) << __func__ << ": failed to start ICMP session with "
587 << address.ToString();
588 AddEventWithMessage(event_type, kPhaseStart, kResultFailure,
589 StringPrintf("Failed to start ICMP session with %s",
590 address.ToString().c_str()));
591 ReportResultAndStop(kIssueInternalError);
592 return;
593 }
594
595 AddEventWithMessage(event_type, kPhaseStart, kResultSuccess,
596 StringPrintf("Pinging %s", address.ToString().c_str()));
597 }
598
OnPingDNSServerComplete(int dns_server_index,const vector<base::TimeDelta> & result)599 void ConnectionDiagnostics::OnPingDNSServerComplete(
600 int dns_server_index, const vector<base::TimeDelta>& result) {
601 SLOG(this, 3) << __func__ << "(DNS server index " << dns_server_index << ")";
602
603 if (!id_to_pending_dns_server_icmp_session_.erase(dns_server_index)) {
604 // This should not happen, since we expect exactly one callback for each
605 // IcmpSession started with a unique |dns_server_index| value in
606 // ConnectionDiagnostics::PingDNSServers. However, if this does happen for
607 // any reason, |id_to_pending_dns_server_icmp_session_| might never become
608 // empty, and we might never move to the next step after pinging DNS
609 // servers. Stop diagnostics immediately to prevent this from happening.
610 LOG(ERROR) << __func__
611 << ": no matching pending DNS server ICMP session found";
612 ReportResultAndStop(kIssueInternalError);
613 return;
614 }
615
616 if (IcmpSession::AnyRepliesReceived(result)) {
617 pingable_dns_servers_.push_back(
618 connection_->dns_servers()[dns_server_index]);
619 }
620 if (!id_to_pending_dns_server_icmp_session_.empty()) {
621 SLOG(this, 3) << __func__ << ": not yet finished pinging all DNS servers";
622 return;
623 }
624
625 if (pingable_dns_servers_.empty()) {
626 // Use the first DNS server on the list and diagnose its connectivity.
627 IPAddress first_dns_server_ip_addr(connection_->dns_servers()[0]);
628 if (first_dns_server_ip_addr.family() == IPAddress::kFamilyUnknown) {
629 LOG(ERROR) << __func__ << ": could not parse DNS server IP address "
630 << connection_->dns_servers()[0];
631 AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultFailure,
632 StringPrintf("Could not parse DNS "
633 "server IP address %s",
634 connection_->dns_servers()[0].c_str()));
635 ReportResultAndStop(kIssueInternalError);
636 return;
637 }
638 AddEventWithMessage(
639 kTypePingDNSServers, kPhaseEnd, kResultFailure,
640 StringPrintf(
641 "No DNS servers responded to pings. Pinging first DNS server at %s",
642 first_dns_server_ip_addr.ToString().c_str()));
643 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindRouteToHost,
644 weak_ptr_factory_.GetWeakPtr(),
645 first_dns_server_ip_addr));
646 return;
647 }
648
649 if (pingable_dns_servers_.size() != connection_->dns_servers().size()) {
650 AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultSuccess,
651 "Pinged some, but not all, DNS servers successfully");
652 } else {
653 AddEventWithMessage(kTypePingDNSServers, kPhaseEnd, kResultSuccess,
654 "Pinged all DNS servers successfully");
655 }
656
657 if (num_dns_attempts_ < kMaxDNSRetries) {
658 dispatcher_->PostTask(
659 Bind(&ConnectionDiagnostics::ResolveTargetServerIPAddress,
660 weak_ptr_factory_.GetWeakPtr(), pingable_dns_servers_));
661 } else {
662 SLOG(this, 3) << __func__ << ": max DNS resolution attempts reached";
663 ReportResultAndStop(kIssueDNSServerNoResponse);
664 }
665 }
666
OnDNSResolutionComplete(const Error & error,const IPAddress & address)667 void ConnectionDiagnostics::OnDNSResolutionComplete(const Error& error,
668 const IPAddress& address) {
669 SLOG(this, 3) << __func__;
670
671 if (error.IsSuccess()) {
672 AddEventWithMessage(
673 kTypeResolveTargetServerIP, kPhaseEnd, kResultSuccess,
674 StringPrintf("Target address is %s", address.ToString().c_str()));
675 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingHost,
676 weak_ptr_factory_.GetWeakPtr(), address));
677 } else if (error.type() == Error::kOperationTimeout) {
678 AddEventWithMessage(
679 kTypeResolveTargetServerIP, kPhaseEnd, kResultTimeout,
680 StringPrintf("DNS resolution timed out: %s", error.message().c_str()));
681 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingDNSServers,
682 weak_ptr_factory_.GetWeakPtr()));
683 } else {
684 AddEventWithMessage(
685 kTypeResolveTargetServerIP, kPhaseEnd, kResultFailure,
686 StringPrintf("DNS resolution failed: %s", error.message().c_str()));
687 ReportResultAndStop(kIssueDNSServerMisconfig);
688 }
689 }
690
OnPingHostComplete(Type ping_event_type,const IPAddress & address_pinged,const vector<base::TimeDelta> & result)691 void ConnectionDiagnostics::OnPingHostComplete(
692 Type ping_event_type, const IPAddress& address_pinged,
693 const vector<base::TimeDelta>& result) {
694 SLOG(this, 3) << __func__;
695
696 string message(StringPrintf("Destination: %s, Latencies: ",
697 address_pinged.ToString().c_str()));
698 for (const auto& latency : result) {
699 if (latency.is_zero()) {
700 message.append("NA ");
701 } else {
702 message.append(StringPrintf("%4.2fms ", latency.InMillisecondsF()));
703 }
704 }
705
706 Result result_type =
707 IcmpSession::AnyRepliesReceived(result) ? kResultSuccess : kResultFailure;
708 if (IcmpSession::IsPacketLossPercentageGreaterThan(result, 50)) {
709 LOG(WARNING) << __func__ << ": high packet loss when pinging "
710 << address_pinged.ToString();
711 }
712 AddEventWithMessage(ping_event_type, kPhaseEnd, result_type, message);
713 if (result_type == kResultSuccess) {
714 // If pinging the target web server succeeded, we have found a HTTP issue or
715 // broken portal. Otherwise, if pinging the gateway succeeded, we have found
716 // an upstream connectivity problem or gateway issue.
717 ReportResultAndStop(ping_event_type == kTypePingGateway
718 ? kIssueGatewayUpstream
719 : kIssueHTTPBrokenPortal);
720 } else if (result_type == kResultFailure &&
721 ping_event_type == kTypePingTargetServer) {
722 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindRouteToHost,
723 weak_ptr_factory_.GetWeakPtr(), address_pinged));
724 } else if (result_type == kResultFailure &&
725 ping_event_type == kTypePingGateway &&
726 address_pinged.family() == IPAddress::kFamilyIPv4) {
727 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindArpTableEntry,
728 weak_ptr_factory_.GetWeakPtr(), address_pinged));
729 } else {
730 // We failed to ping an IPv6 gateway. Check for neighbor table entry for
731 // this gateway.
732 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindNeighborTableEntry,
733 weak_ptr_factory_.GetWeakPtr(), address_pinged));
734 }
735 }
736
OnArpReplyReceived(int fd)737 void ConnectionDiagnostics::OnArpReplyReceived(int fd) {
738 SLOG(this, 3) << __func__ << "(fd " << fd << ")";
739
740 ArpPacket packet;
741 ByteString sender;
742 if (!arp_client_->ReceivePacket(&packet, &sender)) {
743 return;
744 }
745
746 if (!packet.IsReply()) {
747 SLOG(this, 4) << __func__ << ": this is not a reply packet. Ignoring.";
748 return;
749 }
750
751 if (!connection_->local().address().Equals(
752 packet.remote_ip_address().address())) {
753 SLOG(this, 4) << __func__ << ": response is not for our IP address.";
754 return;
755 }
756
757 if (!local_mac_address_.Equals(packet.remote_mac_address())) {
758 SLOG(this, 4) << __func__ << ": response is not for our MAC address.";
759 return;
760 }
761
762 if (connection_->local().address().Equals(
763 packet.local_ip_address().address())) {
764 arp_reply_timeout_callback_.Cancel();
765 AddEventWithMessage(kTypeIPCollisionCheck, kPhaseEnd, kResultSuccess,
766 "IP collision found");
767 ReportResultAndStop(kIssueIPCollision);
768 }
769 }
770
OnArpRequestTimeout()771 void ConnectionDiagnostics::OnArpRequestTimeout() {
772 SLOG(this, 3) << __func__;
773
774 AddEventWithMessage(kTypeIPCollisionCheck, kPhaseEnd, kResultFailure,
775 "No IP collision found");
776 // TODO(samueltan): perform link-level diagnostics.
777 if (DoesPreviousEventMatch(
778 kTypePingGateway, kPhaseEnd, kResultFailure,
779 kNumEventsFromPingGatewayEndToIpCollisionCheckEnd)) {
780 // We came here from failing to ping the gateway.
781 ReportResultAndStop(kIssueGatewayArpFailed);
782 } else {
783 // Otherwise, we must have come here from failing to ping the target web
784 // server and successfully finding a route.
785 ReportResultAndStop(kIssueServerArpFailed);
786 }
787 }
788
OnNeighborMsgReceived(const IPAddress & address_queried,const RTNLMessage & msg)789 void ConnectionDiagnostics::OnNeighborMsgReceived(
790 const IPAddress& address_queried, const RTNLMessage& msg) {
791 SLOG(this, 3) << __func__;
792
793 DCHECK(msg.type() == RTNLMessage::kTypeNeighbor);
794 const RTNLMessage::NeighborStatus& neighbor = msg.neighbor_status();
795
796 if (neighbor.type != NDA_DST || !msg.HasAttribute(NDA_DST)) {
797 SLOG(this, 4) << __func__ << ": neighbor message has no destination";
798 return;
799 }
800
801 IPAddress address(msg.family(), msg.GetAttribute(NDA_DST));
802 if (!address.Equals(address_queried)) {
803 SLOG(this, 4) << __func__ << ": destination address (" << address.ToString()
804 << ") does not match address queried ("
805 << address_queried.ToString() << ")";
806 return;
807 }
808
809 neighbor_request_timeout_callback_.Cancel();
810 if (!(neighbor.state & (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE))) {
811 AddEventWithMessage(
812 kTypeNeighborTableLookup, kPhaseEnd, kResultFailure,
813 StringPrintf("Neighbor table entry for %s is not in a connected state "
814 "(actual state = 0x%2x)",
815 address_queried.ToString().c_str(), neighbor.state));
816 ReportResultAndStop(address_queried.Equals(connection_->gateway())
817 ? kIssueGatewayNeighborEntryNotConnected
818 : kIssueServerNeighborEntryNotConnected);
819 return;
820 }
821
822 AddEventWithMessage(kTypeNeighborTableLookup, kPhaseEnd, kResultSuccess,
823 StringPrintf("Neighbor table entry found for %s",
824 address_queried.ToString().c_str()));
825 ReportResultAndStop(address_queried.Equals(connection_->gateway())
826 ? kIssueGatewayNotResponding
827 : kIssueServerNotResponding);
828 }
829
OnNeighborTableRequestTimeout(const IPAddress & address_queried)830 void ConnectionDiagnostics::OnNeighborTableRequestTimeout(
831 const IPAddress& address_queried) {
832 SLOG(this, 3) << __func__;
833
834 AddEventWithMessage(kTypeNeighborTableLookup, kPhaseEnd, kResultFailure,
835 StringPrintf("Failed to find neighbor table entry for %s",
836 address_queried.ToString().c_str()));
837 ReportResultAndStop(address_queried.Equals(connection_->gateway())
838 ? kIssueGatewayNoNeighborEntry
839 : kIssueServerNoNeighborEntry);
840 }
841
OnRouteQueryResponse(int interface_index,const RoutingTableEntry & entry)842 void ConnectionDiagnostics::OnRouteQueryResponse(
843 int interface_index, const RoutingTableEntry& entry) {
844 SLOG(this, 3) << __func__ << "(interface " << interface_index << ")";
845
846 if (interface_index != connection_->interface_index()) {
847 SLOG(this, 3) << __func__
848 << ": route query response not meant for this interface";
849 return;
850 }
851
852 route_query_timeout_callback_.Cancel();
853 AddEventWithMessage(
854 kTypeFindRoute, kPhaseEnd, kResultSuccess,
855 StringPrintf("Found route to %s (%s)", entry.dst.ToString().c_str(),
856 entry.gateway.IsDefault() ? "remote" : "local"));
857 if (!entry.gateway.IsDefault()) {
858 // We have a route to a remote destination, so ping the route gateway to
859 // check if we have a means of reaching this host.
860 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::PingHost,
861 weak_ptr_factory_.GetWeakPtr(), entry.gateway));
862 } else if (entry.dst.family() == IPAddress::kFamilyIPv4) {
863 // We have a route to a local IPv4 destination, so check for an ARP table
864 // entry.
865 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindArpTableEntry,
866 weak_ptr_factory_.GetWeakPtr(), entry.dst));
867 } else {
868 // We have a route to a local IPv6 destination, so check for a neighbor
869 // table entry.
870 dispatcher_->PostTask(Bind(&ConnectionDiagnostics::FindNeighborTableEntry,
871 weak_ptr_factory_.GetWeakPtr(), entry.dst));
872 }
873 }
874
OnRouteQueryTimeout()875 void ConnectionDiagnostics::OnRouteQueryTimeout() {
876 SLOG(this, 3) << __func__;
877
878 AddEvent(kTypeFindRoute, kPhaseEnd, kResultFailure);
879 ReportResultAndStop(kIssueRouting);
880 }
881
DoesPreviousEventMatch(Type type,Phase phase,Result result,size_t num_events_ago)882 bool ConnectionDiagnostics::DoesPreviousEventMatch(Type type, Phase phase,
883 Result result,
884 size_t num_events_ago) {
885 int event_index = diagnostic_events_.size() - 1 - num_events_ago;
886 if (event_index < 0) {
887 LOG(ERROR) << __func__ << ": requested event " << num_events_ago
888 << " before the last event, but we only have "
889 << diagnostic_events_.size() << " logged";
890 return false;
891 }
892
893 return (diagnostic_events_[event_index].type == type &&
894 diagnostic_events_[event_index].phase == phase &&
895 diagnostic_events_[event_index].result == result);
896 }
897
898 } // namespace shill
899