1 //
2 // Copyright (C) 2013 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "shill/traffic_monitor.h"
18 
19 #include <base/bind.h>
20 #include <base/strings/stringprintf.h>
21 #include <netinet/in.h>
22 
23 #include "shill/device.h"
24 #include "shill/device_info.h"
25 #include "shill/event_dispatcher.h"
26 #include "shill/logging.h"
27 #include "shill/socket_info_reader.h"
28 
29 using base::StringPrintf;
30 using std::string;
31 using std::vector;
32 
33 namespace shill {
34 
35 namespace Logging {
36 static auto kModuleLogScope = ScopeLogger::kLink;
ObjectID(Device * d)37 static string ObjectID(Device* d) { return d->link_name(); }
38 }
39 
40 // static
41 const uint16_t TrafficMonitor::kDnsPort = 53;
42 const int64_t TrafficMonitor::kDnsTimedOutThresholdSeconds = 15;
43 const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2;
44 const int64_t TrafficMonitor::kSamplingIntervalMilliseconds = 5000;
45 
TrafficMonitor(const DeviceRefPtr & device,EventDispatcher * dispatcher)46 TrafficMonitor::TrafficMonitor(const DeviceRefPtr& device,
47                                EventDispatcher* dispatcher)
48     : device_(device),
49       dispatcher_(dispatcher),
50       socket_info_reader_(new SocketInfoReader),
51       accummulated_congested_tx_queues_samples_(0),
52       connection_info_reader_(new ConnectionInfoReader),
53       accummulated_dns_failures_samples_(0) {
54 }
55 
~TrafficMonitor()56 TrafficMonitor::~TrafficMonitor() {
57   Stop();
58 }
59 
Start()60 void TrafficMonitor::Start() {
61   SLOG(device_.get(), 2) << __func__;
62   Stop();
63 
64   sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic,
65                                             base::Unretained(this)));
66   dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(),
67                                kSamplingIntervalMilliseconds);
68 }
69 
Stop()70 void TrafficMonitor::Stop() {
71   SLOG(device_.get(), 2) << __func__;
72   sample_traffic_callback_.Cancel();
73   ResetCongestedTxQueuesStats();
74   ResetDnsFailingStats();
75 }
76 
ResetCongestedTxQueuesStats()77 void TrafficMonitor::ResetCongestedTxQueuesStats() {
78   accummulated_congested_tx_queues_samples_ = 0;
79 }
80 
ResetCongestedTxQueuesStatsWithLogging()81 void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() {
82   SLOG(device_.get(), 2) << __func__ << ": Tx-queues decongested";
83   ResetCongestedTxQueuesStats();
84 }
85 
BuildIPPortToTxQueueLength(const vector<SocketInfo> & socket_infos,IPPortToTxQueueLengthMap * tx_queue_lengths)86 void TrafficMonitor::BuildIPPortToTxQueueLength(
87     const vector<SocketInfo>& socket_infos,
88     IPPortToTxQueueLengthMap* tx_queue_lengths) {
89   SLOG(device_.get(), 3) << __func__;
90   string device_ip_address = device_->ipconfig()->properties().address;
91   for (const auto& info : socket_infos) {
92     SLOG(device_.get(), 4) << "SocketInfo(IP="
93                            << info.local_ip_address().ToString()
94                            << ", TX=" << info.transmit_queue_value()
95                            << ", State=" << info.connection_state()
96                            << ", TimerState=" << info.timer_state();
97     if (info.local_ip_address().ToString() != device_ip_address ||
98         info.transmit_queue_value() == 0 ||
99         info.connection_state() != SocketInfo::kConnectionStateEstablished ||
100         (info.timer_state() != SocketInfo::kTimerStateRetransmitTimerPending &&
101          info.timer_state() !=
102             SocketInfo::kTimerStateZeroWindowProbeTimerPending)) {
103       SLOG(device_.get(), 4) << "Connection Filtered.";
104       continue;
105     }
106     SLOG(device_.get(), 3) << "Monitoring connection: TX="
107                            << info.transmit_queue_value()
108                            << " TimerState=" << info.timer_state();
109 
110     string local_ip_port =
111         StringPrintf("%s:%d",
112                      info.local_ip_address().ToString().c_str(),
113                      info.local_port());
114     (*tx_queue_lengths)[local_ip_port] = info.transmit_queue_value();
115   }
116 }
117 
IsCongestedTxQueues()118 bool TrafficMonitor::IsCongestedTxQueues() {
119   SLOG(device_.get(), 4) << __func__;
120   vector<SocketInfo> socket_infos;
121   if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) ||
122       socket_infos.empty()) {
123     SLOG(device_.get(), 3) << __func__ << ": Empty socket info";
124     ResetCongestedTxQueuesStatsWithLogging();
125     return false;
126   }
127   bool congested_tx_queues = true;
128   IPPortToTxQueueLengthMap curr_tx_queue_lengths;
129   BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths);
130   if (curr_tx_queue_lengths.empty()) {
131     SLOG(device_.get(), 3) << __func__ << ": No interesting socket info";
132     ResetCongestedTxQueuesStatsWithLogging();
133   } else {
134     for (const auto& length_entry : old_tx_queue_lengths_) {
135       IPPortToTxQueueLengthMap::iterator curr_tx_queue_it =
136           curr_tx_queue_lengths.find(length_entry.first);
137       if (curr_tx_queue_it == curr_tx_queue_lengths.end() ||
138           curr_tx_queue_it->second < length_entry.second) {
139         congested_tx_queues = false;
140         // TODO(armansito): If we had a false positive earlier, we may
141         // want to correct it here by invoking a "connection back to normal
142         // callback", so that the OutOfCredits property can be set to
143         // false.
144         break;
145       }
146     }
147     if (congested_tx_queues) {
148       ++accummulated_congested_tx_queues_samples_;
149       SLOG(device_.get(), 2) << __func__
150                              << ": Congested tx-queues detected ("
151                              << accummulated_congested_tx_queues_samples_
152                              << ")";
153     }
154   }
155   old_tx_queue_lengths_ = curr_tx_queue_lengths;
156 
157   return congested_tx_queues;
158 }
159 
ResetDnsFailingStats()160 void TrafficMonitor::ResetDnsFailingStats() {
161   accummulated_dns_failures_samples_ = 0;
162 }
163 
ResetDnsFailingStatsWithLogging()164 void TrafficMonitor::ResetDnsFailingStatsWithLogging() {
165   SLOG(device_.get(), 2) << __func__ << ": DNS queries restored";
166   ResetDnsFailingStats();
167 }
168 
IsDnsFailing()169 bool TrafficMonitor::IsDnsFailing() {
170   SLOG(device_.get(), 4) << __func__;
171   vector<ConnectionInfo> connection_infos;
172   if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) ||
173       connection_infos.empty()) {
174     SLOG(device_.get(), 3) << __func__ << ": Empty connection info";
175   } else {
176     // The time-to-expire counter is used to determine when a DNS request
177     // has timed out.  This counter is the number of seconds remaining until
178     // the entry is removed from the system IP connection tracker.  The
179     // default time is 30 seconds.  This is too long of a wait.  Instead, we
180     // want to time out at |kDnsTimedOutThresholdSeconds|.  Unfortunately,
181     // we cannot simply look for entries less than
182     // |kDnsTimedOutThresholdSeconds| because we will count the entry
183     // multiple times once its time-to-expire is less than
184     // |kDnsTimedOutThresholdSeconds|.  To ensure that we only count an
185     // entry once, we look for entries in this time window between
186     // |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|.
187     const int64_t kDnsTimedOutLowerThresholdSeconds =
188         kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000;
189     string device_ip_address = device_->ipconfig()->properties().address;
190     for (const auto& info : connection_infos) {
191       if (info.protocol() != IPPROTO_UDP ||
192           info.time_to_expire_seconds() > kDnsTimedOutThresholdSeconds ||
193           info.time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds ||
194           !info.is_unreplied() ||
195           info.original_source_ip_address().ToString() != device_ip_address ||
196           info.original_destination_port() != kDnsPort)
197         continue;
198 
199       ++accummulated_dns_failures_samples_;
200       SLOG(device_.get(), 2) << __func__
201                              << ": DNS failures detected ("
202                              << accummulated_dns_failures_samples_ << ")";
203       return true;
204     }
205   }
206   ResetDnsFailingStatsWithLogging();
207   return false;
208 }
209 
SampleTraffic()210 void TrafficMonitor::SampleTraffic() {
211   SLOG(device_.get(), 3) << __func__;
212 
213   // Schedule the sample callback first, so it is possible for the network
214   // problem callback to stop the traffic monitor.
215   dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(),
216                                kSamplingIntervalMilliseconds);
217 
218   if (IsCongestedTxQueues() &&
219       accummulated_congested_tx_queues_samples_ ==
220           kMinimumFailedSamplesToTrigger) {
221     LOG(WARNING) << "Congested tx queues detected, out-of-credits?";
222     network_problem_detected_callback_.Run(kNetworkProblemCongestedTxQueue);
223   } else if (IsDnsFailing() &&
224              accummulated_dns_failures_samples_ ==
225                  kMinimumFailedSamplesToTrigger) {
226     LOG(WARNING) << "DNS queries failing, out-of-credits?";
227     network_problem_detected_callback_.Run(kNetworkProblemDNSFailure);
228   }
229 }
230 
231 }  // namespace shill
232