1 //
2 // Copyright (C) 2015 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "shill/active_link_monitor.h"
18
19 #include <string>
20 #include <vector>
21
22 #include <base/bind.h>
23 #include <base/strings/stringprintf.h>
24 #include <base/strings/string_util.h>
25
26 #include "shill/arp_client.h"
27 #include "shill/arp_packet.h"
28 #include "shill/connection.h"
29 #include "shill/device_info.h"
30 #include "shill/event_dispatcher.h"
31 #include "shill/logging.h"
32 #include "shill/metrics.h"
33 #include "shill/net/ip_address.h"
34 #include "shill/net/shill_time.h"
35
36 using base::Bind;
37 using base::Unretained;
38 using std::string;
39
40 namespace shill {
41
42 namespace Logging {
43 static auto kModuleLogScope = ScopeLogger::kLink;
ObjectID(Connection * c)44 static string ObjectID(Connection* c) { return c->interface_name(); }
45 }
46
47 const int ActiveLinkMonitor::kDefaultTestPeriodMilliseconds = 5000;
48 const int ActiveLinkMonitor::kFailureThreshold = 5;
49 const int ActiveLinkMonitor::kFastTestPeriodMilliseconds = 200;
50 const int ActiveLinkMonitor::kMaxResponseSampleFilterDepth = 5;
51 const int ActiveLinkMonitor::kUnicastReplyReliabilityThreshold = 10;
52
ActiveLinkMonitor(const ConnectionRefPtr & connection,EventDispatcher * dispatcher,Metrics * metrics,DeviceInfo * device_info,const FailureCallback & failure_callback,const SuccessCallback & success_callback)53 ActiveLinkMonitor::ActiveLinkMonitor(const ConnectionRefPtr& connection,
54 EventDispatcher* dispatcher,
55 Metrics* metrics,
56 DeviceInfo* device_info,
57 const FailureCallback& failure_callback,
58 const SuccessCallback& success_callback)
59 : connection_(connection),
60 dispatcher_(dispatcher),
61 metrics_(metrics),
62 device_info_(device_info),
63 failure_callback_(failure_callback),
64 success_callback_(success_callback),
65 // Connection is not provided when this is used as a mock for testing
66 // purpose.
67 arp_client_(
68 new ArpClient(connection ? connection->interface_index() : 0)),
69 test_period_milliseconds_(kDefaultTestPeriodMilliseconds),
70 broadcast_failure_count_(0),
71 unicast_failure_count_(0),
72 broadcast_success_count_(0),
73 unicast_success_count_(0),
74 is_unicast_(false),
75 gateway_supports_unicast_arp_(false),
76 response_sample_count_(0),
77 response_sample_bucket_(0),
78 time_(Time::GetInstance()) {
79 }
80
~ActiveLinkMonitor()81 ActiveLinkMonitor::~ActiveLinkMonitor() {
82 Stop();
83 }
84
Start(int test_period)85 bool ActiveLinkMonitor::Start(int test_period) {
86 SLOG(connection_.get(), 2) << "In " << __func__ << ".";
87 StopMonitorCycle();
88 return StartInternal(test_period);
89 }
90
Stop()91 void ActiveLinkMonitor::Stop() {
92 SLOG(connection_.get(), 2) << "In " << __func__ << ".";
93 // Stop current cycle.
94 StopMonitorCycle();
95
96 // Clear stats accumulated from previous monitor cycles.
97 local_mac_address_.Clear();
98 gateway_mac_address_.Clear();
99 broadcast_success_count_ = 0;
100 unicast_success_count_ = 0;
101 broadcast_failure_count_ = 0;
102 unicast_failure_count_ = 0;
103 is_unicast_ = false;
104 gateway_supports_unicast_arp_ = false;
105 response_sample_bucket_ = 0;
106 response_sample_count_ = 0;
107 }
108
GetResponseTimeMilliseconds() const109 int ActiveLinkMonitor::GetResponseTimeMilliseconds() const {
110 return response_sample_count_ ?
111 response_sample_bucket_ / response_sample_count_ : 0;
112 }
113
IsGatewayFound() const114 bool ActiveLinkMonitor::IsGatewayFound() const {
115 return !gateway_mac_address_.IsZero();
116 }
117
StartInternal(int probe_period_milliseconds)118 bool ActiveLinkMonitor::StartInternal(int probe_period_milliseconds) {
119 test_period_milliseconds_ = probe_period_milliseconds;
120 if (test_period_milliseconds_ > kDefaultTestPeriodMilliseconds) {
121 LOG(WARNING) << "Long test period; UMA stats will be truncated.";
122 }
123
124 if (!device_info_->GetMACAddress(
125 connection_->interface_index(), &local_mac_address_)) {
126 LOG(ERROR) << "Could not get local MAC address.";
127 metrics_->NotifyLinkMonitorFailure(
128 connection_->technology(),
129 Metrics::kLinkMonitorMacAddressNotFound,
130 0, 0, 0);
131 Stop();
132 return false;
133 }
134
135 if (!StartArpClient()) {
136 LOG(ERROR) << "Failed to start ARP client.";
137 metrics_->NotifyLinkMonitorFailure(
138 connection_->technology(),
139 Metrics::kLinkMonitorClientStartFailure,
140 0, 0, 0);
141 Stop();
142 return false;
143 }
144
145 if (gateway_mac_address_.IsEmpty()) {
146 gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
147 }
148 send_request_callback_.Reset(
149 Bind(&ActiveLinkMonitor::SendRequest, Unretained(this)));
150 // Post a task to send ARP request instead of calling it synchronously, to
151 // maintain consistent expectation in the case of send failures, which will
152 // always invoke failure callback.
153 dispatcher_->PostTask(send_request_callback_.callback());
154 return true;
155 }
156
StopMonitorCycle()157 void ActiveLinkMonitor::StopMonitorCycle() {
158 StopArpClient();
159 send_request_callback_.Cancel();
160 timerclear(&sent_request_at_);
161 }
162
AddResponseTimeSample(int response_time_milliseconds)163 void ActiveLinkMonitor::AddResponseTimeSample(int response_time_milliseconds) {
164 SLOG(connection_.get(), 2) << "In " << __func__ << " with sample "
165 << response_time_milliseconds << ".";
166 metrics_->NotifyLinkMonitorResponseTimeSampleAdded(
167 connection_->technology(), response_time_milliseconds);
168 response_sample_bucket_ += response_time_milliseconds;
169 if (response_sample_count_ < kMaxResponseSampleFilterDepth) {
170 ++response_sample_count_;
171 } else {
172 response_sample_bucket_ =
173 response_sample_bucket_ * kMaxResponseSampleFilterDepth /
174 (kMaxResponseSampleFilterDepth + 1);
175 }
176 }
177
178 // static
HardwareAddressToString(const ByteString & address)179 string ActiveLinkMonitor::HardwareAddressToString(const ByteString& address) {
180 std::vector<string> address_parts;
181 for (size_t i = 0; i < address.GetLength(); ++i) {
182 address_parts.push_back(
183 base::StringPrintf("%02x", address.GetConstData()[i]));
184 }
185 return base::JoinString(address_parts, ":");
186 }
187
StartArpClient()188 bool ActiveLinkMonitor::StartArpClient() {
189 if (!arp_client_->StartReplyListener()) {
190 return false;
191 }
192 SLOG(connection_.get(), 4) << "Created ARP client; listening on socket "
193 << arp_client_->socket() << ".";
194 receive_response_handler_.reset(
195 dispatcher_->CreateReadyHandler(
196 arp_client_->socket(),
197 IOHandler::kModeInput,
198 Bind(&ActiveLinkMonitor::ReceiveResponse, Unretained(this))));
199 return true;
200 }
201
StopArpClient()202 void ActiveLinkMonitor::StopArpClient() {
203 arp_client_->Stop();
204 receive_response_handler_.reset();
205 }
206
AddMissedResponse()207 bool ActiveLinkMonitor::AddMissedResponse() {
208 SLOG(connection_.get(), 2) << "In " << __func__ << ".";
209 AddResponseTimeSample(test_period_milliseconds_);
210
211 if (is_unicast_) {
212 if (gateway_supports_unicast_arp_) {
213 ++unicast_failure_count_;
214 }
215 unicast_success_count_ = 0;
216 } else {
217 ++broadcast_failure_count_;
218 broadcast_success_count_ = 0;
219 }
220
221 if (unicast_failure_count_ + broadcast_failure_count_ >= kFailureThreshold) {
222 LOG(ERROR) << "Link monitor has reached the failure threshold with "
223 << broadcast_failure_count_
224 << " broadcast failures and "
225 << unicast_failure_count_
226 << " unicast failures.";
227 failure_callback_.Run(Metrics::kLinkMonitorFailureThresholdReached,
228 broadcast_failure_count_,
229 unicast_failure_count_);
230 Stop();
231 return true;
232 }
233 is_unicast_ = !is_unicast_;
234 return false;
235 }
236
ReceiveResponse(int fd)237 void ActiveLinkMonitor::ReceiveResponse(int fd) {
238 SLOG(connection_.get(), 2) << "In " << __func__ << ".";
239 ArpPacket packet;
240 ByteString sender;
241 if (!arp_client_->ReceivePacket(&packet, &sender)) {
242 return;
243 }
244
245 if (!packet.IsReply()) {
246 SLOG(connection_.get(), 4) << "This is not a reply packet. Ignoring.";
247 return;
248 }
249
250 if (!connection_->local().address().Equals(
251 packet.remote_ip_address().address())) {
252 SLOG(connection_.get(), 4) << "Response is not for our IP address.";
253 return;
254 }
255
256 if (!local_mac_address_.Equals(packet.remote_mac_address())) {
257 SLOG(connection_.get(), 4) << "Response is not for our MAC address.";
258 return;
259 }
260
261 if (!connection_->gateway().address().Equals(
262 packet.local_ip_address().address())) {
263 SLOG(connection_.get(), 4)
264 << "Response is not from the gateway IP address.";
265 return;
266 }
267
268 struct timeval now, elapsed_time;
269 time_->GetTimeMonotonic(&now);
270 timersub(&now, &sent_request_at_, &elapsed_time);
271
272 AddResponseTimeSample(elapsed_time.tv_sec * 1000 +
273 elapsed_time.tv_usec / 1000);
274
275 if (is_unicast_) {
276 ++unicast_success_count_;
277 unicast_failure_count_ = 0;
278 if (unicast_success_count_ >= kUnicastReplyReliabilityThreshold) {
279 SLOG_IF(Link, 2, !gateway_supports_unicast_arp_)
280 << "Gateway is now considered a reliable unicast responder. "
281 "Unicast failures will now count.";
282 gateway_supports_unicast_arp_ = true;
283 }
284 } else {
285 ++broadcast_success_count_;
286 broadcast_failure_count_ = 0;
287 }
288
289 if (!gateway_mac_address_.Equals(packet.local_mac_address())) {
290 const ByteString& new_mac_address = packet.local_mac_address();
291 if (!IsGatewayFound()) {
292 SLOG(connection_.get(), 2) << "Found gateway at "
293 << HardwareAddressToString(new_mac_address);
294 } else {
295 SLOG(connection_.get(), 2) << "Gateway MAC address changed.";
296 }
297 gateway_mac_address_ = new_mac_address;
298 }
299
300 is_unicast_ = !is_unicast_;
301
302 // Stop the current cycle, and invoke the success callback. All the
303 // accumulated stats regarding the gateway are not cleared.
304 StopMonitorCycle();
305 success_callback_.Run();
306 }
307
SendRequest()308 void ActiveLinkMonitor::SendRequest() {
309 SLOG(connection_.get(), 2) << "In " << __func__ << ".";
310
311 // Timeout waiting for ARP reply and exceed the failure threshold.
312 if (timerisset(&sent_request_at_) && AddMissedResponse()) {
313 return;
314 }
315
316 ByteString destination_mac_address(gateway_mac_address_.GetLength());
317 if (!IsGatewayFound()) {
318 // The remote MAC addess is set by convention to be all-zeroes in the
319 // ARP header if not known. The ArpClient will translate an all-zeroes
320 // remote address into a send to the broadcast (all-ones) address in
321 // the Ethernet frame header.
322 SLOG_IF(Link, 2, is_unicast_) << "Sending broadcast since "
323 << "gateway MAC is unknown";
324 is_unicast_ = false;
325 } else if (is_unicast_) {
326 destination_mac_address = gateway_mac_address_;
327 }
328
329 ArpPacket request(connection_->local(), connection_->gateway(),
330 local_mac_address_, destination_mac_address);
331 if (!arp_client_->TransmitRequest(request)) {
332 LOG(ERROR) << "Failed to send ARP request. Stopping.";
333 failure_callback_.Run(Metrics::kLinkMonitorTransmitFailure,
334 broadcast_failure_count_,
335 unicast_failure_count_);
336 Stop();
337 return;
338 }
339
340 time_->GetTimeMonotonic(&sent_request_at_);
341
342 dispatcher_->PostDelayedTask(send_request_callback_.callback(),
343 test_period_milliseconds_);
344 }
345
346 } // namespace shill
347