1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #define LOG_TAG "resolv"
17
18 #include <arpa/nameser.h>
19 #include <stdbool.h>
20 #include <string.h>
21
22 #include <android-base/logging.h>
23
24 #include "stats.h"
25
26 // Calculate the round-trip-time from start time t0 and end time t1.
res_stats_calculate_rtt(const timespec * t1,const timespec * t0)27 int res_stats_calculate_rtt(const timespec* t1, const timespec* t0) {
28 // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious)
29 long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000;
30 long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000;
31 return (int) (ms1 - ms0);
32 }
33
34 // Create a sample for calculating server reachability statistics.
res_stats_set_sample(res_sample * sample,time_t now,int rcode,int rtt)35 void res_stats_set_sample(res_sample* sample, time_t now, int rcode, int rtt) {
36 LOG(INFO) << __func__ << ": rcode = " << rcode << ", sec = " << rtt;
37 sample->at = now;
38 sample->rcode = rcode;
39 sample->rtt = rtt;
40 }
41
42 /* Clears all stored samples for the given server. */
_res_stats_clear_samples(res_stats * stats)43 void _res_stats_clear_samples(res_stats* stats) {
44 stats->sample_count = stats->sample_next = 0;
45 }
46
47 /* Aggregates the reachability statistics for the given server based on on the stored samples. */
android_net_res_stats_aggregate(res_stats * stats,int * successes,int * errors,int * timeouts,int * internal_errors,int * rtt_avg,time_t * last_sample_time)48 void android_net_res_stats_aggregate(res_stats* stats, int* successes, int* errors, int* timeouts,
49 int* internal_errors, int* rtt_avg, time_t* last_sample_time) {
50 int s = 0; // successes
51 int e = 0; // errors
52 int t = 0; // timouts
53 int ie = 0; // internal errors
54 long rtt_sum = 0;
55 time_t last = 0;
56 int rtt_count = 0;
57 for (int i = 0; i < stats->sample_count; ++i) {
58 // Treat everything as an error that the code in send_dg() already considers a
59 // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN
60 // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section
61 // is not treated as an error here either. FORMERR seems to sometimes be returned by
62 // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses
63 // as an indication of a broken server is unclear, though. For now treat such responses,
64 // as well as unknown codes as errors.
65 switch (stats->samples[i].rcode) {
66 case NOERROR:
67 case NOTAUTH:
68 case NXDOMAIN:
69 ++s;
70 rtt_sum += stats->samples[i].rtt;
71 ++rtt_count;
72 break;
73 case RCODE_TIMEOUT:
74 ++t;
75 break;
76 case RCODE_INTERNAL_ERROR:
77 ++ie;
78 break;
79 case SERVFAIL:
80 case NOTIMP:
81 case REFUSED:
82 default:
83 ++e;
84 break;
85 }
86 }
87 *successes = s;
88 *errors = e;
89 *timeouts = t;
90 *internal_errors = ie;
91 /* If there was at least one successful sample, calculate average RTT. */
92 if (rtt_count) {
93 *rtt_avg = rtt_sum / rtt_count;
94 } else {
95 *rtt_avg = -1;
96 }
97 /* If we had at least one sample, populate last sample time. */
98 if (stats->sample_count > 0) {
99 if (stats->sample_next > 0) {
100 last = stats->samples[stats->sample_next - 1].at;
101 } else {
102 last = stats->samples[stats->sample_count - 1].at;
103 }
104 }
105 *last_sample_time = last;
106 }
107
108 // Returns true if the server is considered usable, i.e. if the success rate is not lower than the
109 // threshold for the stored stored samples. If not enough samples are stored, the server is
110 // considered usable.
res_stats_usable_server(const res_params * params,res_stats * stats)111 static bool res_stats_usable_server(const res_params* params, res_stats* stats) {
112 int successes = -1;
113 int errors = -1;
114 int timeouts = -1;
115 int internal_errors = -1;
116 int rtt_avg = -1;
117 time_t last_sample_time = 0;
118 android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors,
119 &rtt_avg, &last_sample_time);
120 if (successes >= 0 && errors >= 0 && timeouts >= 0) {
121 int total = successes + errors + timeouts + internal_errors;
122 LOG(INFO) << __func__ << ": NS stats: S " << successes << " + E " << errors << " + T "
123 << timeouts << " + I " << internal_errors << " = " << total
124 << ", rtt = " << rtt_avg << ", min_samples = " << unsigned(params->min_samples);
125 if (total >= params->min_samples) {
126 int success_rate = successes * 100 / total;
127 LOG(INFO) << __func__ << ": success rate " << success_rate;
128 if (success_rate < params->success_threshold) {
129 time_t now = time(NULL);
130 if (now - last_sample_time > params->sample_validity) {
131 // Note: It might be worth considering to expire old servers after their expiry
132 // date has been reached, however the code for returning the ring buffer to its
133 // previous non-circular state would induce additional complexity.
134 LOG(INFO) << __func__ << ": samples stale, retrying server";
135 _res_stats_clear_samples(stats);
136 } else {
137 LOG(INFO) << __func__ << ": too many resolution errors, ignoring server";
138 return 0;
139 }
140 }
141 }
142 }
143 return 1;
144 }
145
android_net_res_stats_get_usable_servers(const res_params * params,res_stats stats[],int nscount,bool usable_servers[])146 int android_net_res_stats_get_usable_servers(const res_params* params, res_stats stats[],
147 int nscount, bool usable_servers[]) {
148 unsigned usable_servers_found = 0;
149 for (int ns = 0; ns < nscount; ns++) {
150 bool usable = res_stats_usable_server(params, &stats[ns]);
151 if (usable) {
152 ++usable_servers_found;
153 }
154 usable_servers[ns] = usable;
155 }
156 // If there are no usable servers, consider all of them usable.
157 // TODO: Explore other possibilities, such as enabling only the best N servers, etc.
158 if (usable_servers_found == 0) {
159 for (int ns = 0; ns < nscount; ns++) {
160 usable_servers[ns] = true;
161 }
162 }
163 return (usable_servers_found == 0) ? nscount : usable_servers_found;
164 }
165