1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "resolv"
18 
19 #include "DnsTlsDispatcher.h"
20 
21 #include <netdutils/Stopwatch.h>
22 
23 #include "DnsTlsSocketFactory.h"
24 #include "Experiments.h"
25 #include "PrivateDnsConfiguration.h"
26 #include "resolv_cache.h"
27 #include "resolv_private.h"
28 #include "stats.pb.h"
29 
30 #include <android-base/logging.h>
31 
32 namespace android {
33 namespace net {
34 
35 using android::netdutils::IPSockAddr;
36 using android::netdutils::Stopwatch;
37 using netdutils::Slice;
38 
39 // static
40 std::mutex DnsTlsDispatcher::sLock;
41 
DnsTlsDispatcher()42 DnsTlsDispatcher::DnsTlsDispatcher() {
43     mFactory.reset(new DnsTlsSocketFactory());
44 }
45 
getInstance()46 DnsTlsDispatcher& DnsTlsDispatcher::getInstance() {
47     static DnsTlsDispatcher instance;
48     return instance;
49 }
50 
getOrderedAndUsableServerList(const std::list<DnsTlsServer> & tlsServers,unsigned netId,unsigned mark)51 std::list<DnsTlsServer> DnsTlsDispatcher::getOrderedAndUsableServerList(
52         const std::list<DnsTlsServer>& tlsServers, unsigned netId, unsigned mark) {
53     // Our preferred DnsTlsServer order is:
54     //     1) reuse existing IPv6 connections
55     //     2) reuse existing IPv4 connections
56     //     3) establish new IPv6 connections
57     //     4) establish new IPv4 connections
58     std::list<DnsTlsServer> existing6;
59     std::list<DnsTlsServer> existing4;
60     std::list<DnsTlsServer> new6;
61     std::list<DnsTlsServer> new4;
62 
63     // Pull out any servers for which we might have existing connections and
64     // place them at the from the list of servers to try.
65     {
66         std::lock_guard guard(sLock);
67 
68         for (const auto& tlsServer : tlsServers) {
69             const Key key = std::make_pair(mark, tlsServer);
70             if (Transport* xport = getTransport(key); xport != nullptr) {
71                 if (!xport->usable()) {
72                     // Don't use this xport. It will be removed after timeout
73                     // (IDLE_TIMEOUT minutes).
74                     LOG(DEBUG) << "Skip using DoT server " << tlsServer.toString() << " on "
75                                << netId;
76                     continue;
77                 }
78 
79                 switch (tlsServer.ss.ss_family) {
80                     case AF_INET:
81                         existing4.push_back(tlsServer);
82                         break;
83                     case AF_INET6:
84                         existing6.push_back(tlsServer);
85                         break;
86                 }
87             } else {
88                 switch (tlsServer.ss.ss_family) {
89                     case AF_INET:
90                         new4.push_back(tlsServer);
91                         break;
92                     case AF_INET6:
93                         new6.push_back(tlsServer);
94                         break;
95                 }
96             }
97         }
98     }
99 
100     auto& out = existing6;
101     out.splice(out.cend(), existing4);
102     out.splice(out.cend(), new6);
103     out.splice(out.cend(), new4);
104     return out;
105 }
106 
query(const std::list<DnsTlsServer> & tlsServers,ResState * statp,const Slice query,const Slice ans,int * resplen,bool dotQuickFallback)107 DnsTlsTransport::Response DnsTlsDispatcher::query(const std::list<DnsTlsServer>& tlsServers,
108                                                   ResState* statp, const Slice query,
109                                                   const Slice ans, int* resplen,
110                                                   bool dotQuickFallback) {
111     const std::list<DnsTlsServer> servers(
112             getOrderedAndUsableServerList(tlsServers, statp->netid, statp->mark));
113 
114     if (servers.empty()) {
115         LOG(WARNING) << "No usable DnsTlsServers";
116 
117         // Call maybeCleanup so the expired Transports can be removed as expected.
118         std::lock_guard guard(sLock);
119         maybeCleanup(std::chrono::steady_clock::now());
120     }
121 
122     DnsTlsTransport::Response code = DnsTlsTransport::Response::internal_error;
123     int serverCount = 0;
124     for (const auto& server : servers) {
125         DnsQueryEvent* dnsQueryEvent =
126                 statp->event->mutable_dns_query_events()->add_dns_query_event();
127 
128         bool connectTriggered = false;
129         Stopwatch queryStopwatch;
130         code = this->query(server, statp->netid, statp->mark, query, ans, resplen,
131                            &connectTriggered);
132 
133         dnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
134         dnsQueryEvent->set_dns_server_index(serverCount++);
135         dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(server.ss.ss_family));
136         dnsQueryEvent->set_protocol(PROTO_DOT);
137         std::span<const uint8_t> msg(query.base(), query.size());
138         dnsQueryEvent->set_type(getQueryType(msg));
139         dnsQueryEvent->set_connected(connectTriggered);
140 
141         switch (code) {
142             // These response codes are valid responses and not expected to
143             // change if another server is queried.
144             case DnsTlsTransport::Response::success:
145                 dnsQueryEvent->set_rcode(
146                         static_cast<NsRcode>(reinterpret_cast<HEADER*>(ans.base())->rcode));
147                 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
148                 return code;
149             case DnsTlsTransport::Response::limit_error:
150                 dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
151                 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
152                 return code;
153             // These response codes might differ when trying other servers, so
154             // keep iterating to see if we can get a different (better) result.
155             case DnsTlsTransport::Response::network_error:
156                 // Sync from res_tls_send in res_send.cpp
157                 dnsQueryEvent->set_rcode(NS_R_TIMEOUT);
158                 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
159                 if (dotQuickFallback) {
160                     return code;
161                 }
162                 break;
163             case DnsTlsTransport::Response::internal_error:
164                 dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
165                 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
166                 break;
167             // No "default" statement.
168         }
169     }
170 
171     return code;
172 }
173 
query(const DnsTlsServer & server,unsigned netId,unsigned mark,const Slice query,const Slice ans,int * resplen,bool * connectTriggered)174 DnsTlsTransport::Response DnsTlsDispatcher::query(const DnsTlsServer& server, unsigned netId,
175                                                   unsigned mark, const Slice query, const Slice ans,
176                                                   int* resplen, bool* connectTriggered) {
177     // TODO: This can cause the resolver to create multiple connections to the same DoT server
178     // merely due to different mark, such as the bit explicitlySelected unset.
179     // See if we can save them and just create one connection for one DoT server.
180     const Key key = std::make_pair(mark, server);
181     Transport* xport;
182     {
183         std::lock_guard guard(sLock);
184         if (xport = getTransport(key); xport == nullptr) {
185             xport = addTransport(server, mark, netId);
186         }
187         ++xport->useCount;
188     }
189 
190     // Don't call this function and hold sLock at the same time because of the following reason:
191     // TLS handshake requires a lock which is also needed by this function, if the handshake gets
192     // stuck, this function also gets blocked.
193     const int connectCounter = xport->transport.getConnectCounter();
194 
195     const auto& result = queryInternal(*xport, query);
196     *connectTriggered = (xport->transport.getConnectCounter() > connectCounter);
197 
198     DnsTlsTransport::Response code = result.code;
199     if (code == DnsTlsTransport::Response::success) {
200         if (result.response.size() > ans.size()) {
201             LOG(DEBUG) << "Response too large: " << result.response.size() << " > " << ans.size();
202             code = DnsTlsTransport::Response::limit_error;
203         } else {
204             LOG(DEBUG) << "Got response successfully";
205             *resplen = result.response.size();
206             netdutils::copy(ans, netdutils::makeSlice(result.response));
207         }
208     } else {
209         LOG(DEBUG) << "Query failed: " << (unsigned int)code;
210     }
211 
212     auto now = std::chrono::steady_clock::now();
213     {
214         std::lock_guard guard(sLock);
215         --xport->useCount;
216         xport->lastUsed = now;
217         if (code == DnsTlsTransport::Response::network_error) {
218             xport->continuousfailureCount++;
219         } else {
220             xport->continuousfailureCount = 0;
221         }
222 
223         // DoT revalidation specific feature.
224         if (xport->checkRevalidationNecessary()) {
225             // Even if the revalidation passes, it doesn't guarantee that DoT queries
226             // to the xport can stop failing because revalidation creates a new connection
227             // to probe while the xport still uses an existing connection. So far, there isn't
228             // a feasible way to force the xport to disconnect the connection. If the case
229             // happens, the xport will be marked as unusable and DoT queries won't be sent to
230             // it anymore. Eventually, after IDLE_TIMEOUT, the xport will be destroyed, and
231             // a new xport will be created.
232             const auto result = PrivateDnsConfiguration::getInstance().requestDotValidation(
233                     netId, PrivateDnsConfiguration::ServerIdentity{server}, mark);
234             LOG(WARNING) << "Requested validation for " << server.toString() << " with mark 0x"
235                          << std::hex << mark << ", "
236                          << (result.ok() ? "succeeded" : "failed: " + result.error().message());
237         }
238 
239         maybeCleanup(now);
240     }
241     return code;
242 }
243 
forceCleanup(unsigned netId)244 void DnsTlsDispatcher::forceCleanup(unsigned netId) {
245     std::lock_guard guard(sLock);
246     cleanup(std::chrono::steady_clock::now(), netId);
247 }
248 
queryInternal(Transport & xport,const netdutils::Slice query)249 DnsTlsTransport::Result DnsTlsDispatcher::queryInternal(Transport& xport,
250                                                         const netdutils::Slice query) {
251     LOG(DEBUG) << "Sending query of length " << query.size();
252 
253     // If dot_async_handshake is not set, the call might block in some cases; otherwise,
254     // the call should return very soon.
255     auto res = xport.transport.query(query);
256     LOG(DEBUG) << "Awaiting response";
257 
258     if (xport.timeout().count() == -1) {
259         // Infinite timeout.
260         return res.get();
261     }
262 
263     const auto status = res.wait_for(xport.timeout());
264     if (status == std::future_status::timeout) {
265         // TODO(b/186613628): notify the Transport to remove this query.
266         LOG(WARNING) << "DoT query timed out after " << xport.timeout().count() << " ms";
267         return DnsTlsTransport::Result{
268                 .code = DnsTlsTransport::Response::network_error,
269                 .response = {},
270         };
271     }
272 
273     return res.get();
274 }
275 
276 // This timeout effectively controls how long to keep SSL session tickets.
277 static constexpr std::chrono::minutes IDLE_TIMEOUT(5);
maybeCleanup(std::chrono::time_point<std::chrono::steady_clock> now)278 void DnsTlsDispatcher::maybeCleanup(std::chrono::time_point<std::chrono::steady_clock> now) {
279     // To avoid scanning mStore after every query, return early if a cleanup has been
280     // performed recently.
281     if (now - mLastCleanup < IDLE_TIMEOUT) {
282         return;
283     }
284     cleanup(now, std::nullopt);
285     mLastCleanup = now;
286 }
287 
cleanup(std::chrono::time_point<std::chrono::steady_clock> now,std::optional<unsigned> netId)288 void DnsTlsDispatcher::cleanup(std::chrono::time_point<std::chrono::steady_clock> now,
289                                std::optional<unsigned> netId) {
290     std::erase_if(mStore, [&](const auto& item) REQUIRES(sLock) {
291         auto const& [_, xport] = item;
292         if (xport->useCount == 0) {
293             if (netId.has_value() && xport->mNetId == netId.value()) return true;
294             if (now - xport->lastUsed > IDLE_TIMEOUT) return true;
295         }
296         return false;
297     });
298 }
299 
addTransport(const DnsTlsServer & server,unsigned mark,unsigned netId)300 DnsTlsDispatcher::Transport* DnsTlsDispatcher::addTransport(const DnsTlsServer& server,
301                                                             unsigned mark, unsigned netId) {
302     const Key key = std::make_pair(mark, server);
303     Transport* ret = getTransport(key);
304     if (ret != nullptr) return ret;
305 
306     const Experiments* const instance = Experiments::getInstance();
307     int triggerThr =
308             instance->getFlag("dot_revalidation_threshold", Transport::kDotRevalidationThreshold);
309     int unusableThr = instance->getFlag("dot_xport_unusable_threshold",
310                                         Transport::kDotXportUnusableThreshold);
311     int queryTimeout = instance->getFlag("dot_query_timeout_ms", Transport::kDotQueryTimeoutMs);
312 
313     // Check and adjust the parameters if they are improperly set.
314     const bool isForOpportunisticMode = server.name.empty();
315     if (triggerThr <= 0 || !isForOpportunisticMode) {
316         triggerThr = -1;
317     }
318     if (unusableThr <= 0 || !isForOpportunisticMode) {
319         unusableThr = -1;
320     }
321     if (queryTimeout < 0) {
322         queryTimeout = -1;
323     } else if (queryTimeout < 1000) {
324         queryTimeout = 1000;
325     }
326 
327     ret = new Transport(server, mark, netId, mFactory.get(), triggerThr, unusableThr, queryTimeout);
328     LOG(INFO) << "Transport is initialized with { " << triggerThr << ", " << unusableThr << ", "
329               << queryTimeout << "ms }"
330               << " for server " << server.toString();
331 
332     mStore[key].reset(ret);
333 
334     return ret;
335 }
336 
getTransport(const Key & key)337 DnsTlsDispatcher::Transport* DnsTlsDispatcher::getTransport(const Key& key) {
338     auto it = mStore.find(key);
339     return (it == mStore.end() ? nullptr : it->second.get());
340 }
341 
checkRevalidationNecessary()342 bool DnsTlsDispatcher::Transport::checkRevalidationNecessary() {
343     if (triggerThreshold <= 0) return false;
344     if (continuousfailureCount < triggerThreshold) return false;
345     if (isRevalidationThresholdReached) return false;
346 
347     isRevalidationThresholdReached = true;
348     return true;
349 }
350 
usable()351 bool DnsTlsDispatcher::Transport::usable() {
352     if (unusableThreshold <= 0) return true;
353 
354     if (continuousfailureCount >= unusableThreshold) {
355         // Once reach the threshold, mark this Transport as unusable.
356         isXportUnusableThresholdReached = true;
357     }
358     return !isXportUnusableThresholdReached;
359 }
360 
361 }  // end of namespace net
362 }  // end of namespace android
363