1 /*	$NetBSD: res_send.c,v 1.9 2006/01/24 17:41:25 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1985, 1989, 1993
5  *    The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  * 	This product includes software developed by the University of
18  * 	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * Portions Copyright (c) 1993 by Digital Equipment Corporation.
38  *
39  * Permission to use, copy, modify, and distribute this software for any
40  * purpose with or without fee is hereby granted, provided that the above
41  * copyright notice and this permission notice appear in all copies, and that
42  * the name of Digital Equipment Corporation not be used in advertising or
43  * publicity pertaining to distribution of the document or software without
44  * specific, written prior permission.
45  *
46  * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
47  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
48  * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
49  * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
50  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
51  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
52  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53  * SOFTWARE.
54  */
55 
56 /*
57  * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
58  * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
59  *
60  * Permission to use, copy, modify, and distribute this software for any
61  * purpose with or without fee is hereby granted, provided that the above
62  * copyright notice and this permission notice appear in all copies.
63  *
64  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
65  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66  * MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR
67  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
70  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71  */
72 
73 /*
74  * Send query to name server and wait for reply.
75  */
76 
77 #define LOG_TAG "resolv"
78 
79 #include <chrono>
80 
81 #include <sys/param.h>
82 #include <sys/socket.h>
83 #include <sys/time.h>
84 #include <sys/uio.h>
85 
86 #include <arpa/inet.h>
87 #include <arpa/nameser.h>
88 
89 #include <errno.h>
90 #include <fcntl.h>
91 #include <netdb.h>
92 #include <poll.h>
93 #include <signal.h>
94 #include <stdlib.h>
95 #include <string.h>
96 #include <time.h>
97 #include <unistd.h>
98 
99 #include <android-base/logging.h>
100 #include <android-base/result.h>
101 #include <android/multinetwork.h>  // ResNsendFlags
102 
103 #include <netdutils/Slice.h>
104 #include <netdutils/Stopwatch.h>
105 #include "DnsTlsDispatcher.h"
106 #include "DnsTlsTransport.h"
107 #include "Experiments.h"
108 #include "PrivateDnsConfiguration.h"
109 #include "netd_resolv/resolv.h"
110 #include "private/android_filesystem_config.h"
111 
112 #include "res_comp.h"
113 #include "res_debug.h"
114 #include "resolv_cache.h"
115 #include "stats.h"
116 #include "stats.pb.h"
117 #include "util.h"
118 
119 using namespace std::chrono_literals;
120 // TODO: use the namespace something like android::netd_resolv for libnetd_resolv
121 using android::base::ErrnoError;
122 using android::base::Result;
123 using android::net::CacheStatus;
124 using android::net::DnsQueryEvent;
125 using android::net::DnsTlsDispatcher;
126 using android::net::DnsTlsTransport;
127 using android::net::IpVersion;
128 using android::net::IV_IPV4;
129 using android::net::IV_IPV6;
130 using android::net::IV_UNKNOWN;
131 using android::net::LinuxErrno;
132 using android::net::NetworkDnsEventReported;
133 using android::net::NS_T_INVALID;
134 using android::net::NsRcode;
135 using android::net::NsType;
136 using android::net::PrivateDnsConfiguration;
137 using android::net::PrivateDnsMode;
138 using android::net::PrivateDnsModes;
139 using android::net::PrivateDnsStatus;
140 using android::net::PROTO_TCP;
141 using android::net::PROTO_UDP;
142 using android::netdutils::IPSockAddr;
143 using android::netdutils::Slice;
144 using android::netdutils::Stopwatch;
145 
146 static int send_vc(res_state statp, res_params* params, const uint8_t* buf, int buflen,
147                    uint8_t* ans, int anssiz, int* terrno, size_t ns, time_t* at, int* rcode,
148                    int* delay);
149 static int send_dg(res_state statp, res_params* params, const uint8_t* buf, int buflen,
150                    uint8_t* ans, int anssiz, int* terrno, size_t* ns, int* v_circuit,
151                    int* gotsomewhere, time_t* at, int* rcode, int* delay);
152 
153 static void dump_error(const char*, const struct sockaddr*, int);
154 
155 static int sock_eq(struct sockaddr*, struct sockaddr*);
156 static int connect_with_timeout(int sock, const struct sockaddr* nsap, socklen_t salen,
157                                 const struct timespec timeout);
158 static int retrying_poll(const int sock, short events, const struct timespec* finish);
159 static int res_tls_send(res_state, const Slice query, const Slice answer, int* rcode,
160                         bool* fallback);
161 
162 NsType getQueryType(const uint8_t* msg, size_t msgLen) {
163     ns_msg handle;
164     ns_rr rr;
165     if (ns_initparse((const uint8_t*)msg, msgLen, &handle) < 0 ||
166         ns_parserr(&handle, ns_s_qd, 0, &rr) < 0) {
167         return NS_T_INVALID;
168     }
169     return static_cast<NsType>(ns_rr_type(rr));
170 }
171 
172 IpVersion ipFamilyToIPVersion(const int ipFamily) {
173     switch (ipFamily) {
174         case AF_INET:
175             return IV_IPV4;
176         case AF_INET6:
177             return IV_IPV6;
178         default:
179             return IV_UNKNOWN;
180     }
181 }
182 
183 // BEGIN: Code copied from ISC eventlib
184 // TODO: move away from this code
185 #define BILLION 1000000000
186 
187 static struct timespec evConsTime(time_t sec, long nsec) {
188     struct timespec x;
189 
190     x.tv_sec = sec;
191     x.tv_nsec = nsec;
192     return (x);
193 }
194 
195 static struct timespec evAddTime(struct timespec addend1, struct timespec addend2) {
196     struct timespec x;
197 
198     x.tv_sec = addend1.tv_sec + addend2.tv_sec;
199     x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
200     if (x.tv_nsec >= BILLION) {
201         x.tv_sec++;
202         x.tv_nsec -= BILLION;
203     }
204     return (x);
205 }
206 
207 static struct timespec evSubTime(struct timespec minuend, struct timespec subtrahend) {
208     struct timespec x;
209 
210     x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
211     if (minuend.tv_nsec >= subtrahend.tv_nsec)
212         x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
213     else {
214         x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
215         x.tv_sec--;
216     }
217     return (x);
218 }
219 
220 static int evCmpTime(struct timespec a, struct timespec b) {
221 #define SGN(x) ((x) < 0 ? (-1) : (x) > 0 ? (1) : (0));
222     time_t s = a.tv_sec - b.tv_sec;
223     long n;
224 
225     if (s != 0) return SGN(s);
226 
227     n = a.tv_nsec - b.tv_nsec;
228     return SGN(n);
229 }
230 
231 static struct timespec evNowTime(void) {
232     struct timespec tsnow;
233     clock_gettime(CLOCK_REALTIME, &tsnow);
234     return tsnow;
235 }
236 
237 // END: Code copied from ISC eventlib
238 
239 /* BIONIC-BEGIN: implement source port randomization */
240 static int random_bind(int s, int family) {
241     sockaddr_union u;
242     int j;
243     socklen_t slen;
244 
245     /* clear all, this also sets the IP4/6 address to 'any' */
246     memset(&u, 0, sizeof u);
247 
248     switch (family) {
249         case AF_INET:
250             u.sin.sin_family = family;
251             slen = sizeof u.sin;
252             break;
253         case AF_INET6:
254             u.sin6.sin6_family = family;
255             slen = sizeof u.sin6;
256             break;
257         default:
258             errno = EPROTO;
259             return -1;
260     }
261 
262     /* first try to bind to a random source port a few times */
263     for (j = 0; j < 10; j++) {
264         /* find a random port between 1025 .. 65534 */
265         int port = 1025 + (arc4random_uniform(65535 - 1025));
266         if (family == AF_INET)
267             u.sin.sin_port = htons(port);
268         else
269             u.sin6.sin6_port = htons(port);
270 
271         if (!bind(s, &u.sa, slen)) return 0;
272     }
273 
274     // nothing after 10 attempts, our network table is probably busy
275     // let the system decide which port is best
276     if (family == AF_INET)
277         u.sin.sin_port = 0;
278     else
279         u.sin6.sin6_port = 0;
280 
281     return bind(s, &u.sa, slen);
282 }
283 /* BIONIC-END */
284 
285 // Disables all nameservers other than selectedServer
286 static void res_set_usable_server(int selectedServer, int nscount, bool usable_servers[]) {
287     int usableIndex = 0;
288     for (int ns = 0; ns < nscount; ns++) {
289         if (usable_servers[ns]) ++usableIndex;
290         if (usableIndex != selectedServer) usable_servers[ns] = false;
291     }
292 }
293 
294 // Looks up the nameserver address in res.nsaddrs[], returns the ns number if found, otherwise -1.
295 static int res_ourserver_p(res_state statp, const sockaddr* sa) {
296     const sockaddr_in *inp, *srv;
297     const sockaddr_in6 *in6p, *srv6;
298     int ns = 0;
299     switch (sa->sa_family) {
300         case AF_INET:
301             inp = (const struct sockaddr_in*) (const void*) sa;
302 
303             for (const IPSockAddr& ipsa : statp->nsaddrs) {
304                 sockaddr_storage ss = ipsa;
305                 srv = reinterpret_cast<sockaddr_in*>(&ss);
306                 if (srv->sin_family == inp->sin_family && srv->sin_port == inp->sin_port &&
307                     (srv->sin_addr.s_addr == INADDR_ANY ||
308                      srv->sin_addr.s_addr == inp->sin_addr.s_addr))
309                     return ns;
310                 ++ns;
311             }
312             break;
313         case AF_INET6:
314             in6p = (const struct sockaddr_in6*) (const void*) sa;
315             for (const IPSockAddr& ipsa : statp->nsaddrs) {
316                 sockaddr_storage ss = ipsa;
317                 srv6 = reinterpret_cast<sockaddr_in6*>(&ss);
318                 if (srv6->sin6_family == in6p->sin6_family && srv6->sin6_port == in6p->sin6_port &&
319 #ifdef HAVE_SIN6_SCOPE_ID
320                     (srv6->sin6_scope_id == 0 || srv6->sin6_scope_id == in6p->sin6_scope_id) &&
321 #endif
322                     (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
323                      IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
324                     return ns;
325                 ++ns;
326             }
327             break;
328         default:
329             break;
330     }
331     return -1;
332 }
333 
334 /* int
335  * res_nameinquery(name, type, cl, buf, eom)
336  *	look for (name, type, cl) in the query section of packet (buf, eom)
337  * requires:
338  *	buf + HFIXEDSZ <= eom
339  * returns:
340  *	-1 : format error
341  *	0  : not found
342  *	>0 : found
343  * author:
344  *	paul vixie, 29may94
345  */
346 int res_nameinquery(const char* name, int type, int cl, const uint8_t* buf, const uint8_t* eom) {
347     const uint8_t* cp = buf + HFIXEDSZ;
348     int qdcount = ntohs(((const HEADER*) (const void*) buf)->qdcount);
349 
350     while (qdcount-- > 0) {
351         char tname[MAXDNAME + 1];
352         int n = dn_expand(buf, eom, cp, tname, sizeof tname);
353         if (n < 0) return (-1);
354         cp += n;
355         if (cp + 2 * INT16SZ > eom) return (-1);
356         int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
357         cp += INT16SZ;
358         int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
359         cp += INT16SZ;
360         if (ttype == type && tclass == cl && ns_samename(tname, name) == 1) return (1);
361     }
362     return (0);
363 }
364 
365 /* int
366  * res_queriesmatch(buf1, eom1, buf2, eom2)
367  *	is there a 1:1 mapping of (name,type,class)
368  *	in (buf1,eom1) and (buf2,eom2)?
369  * returns:
370  *	-1 : format error
371  *	0  : not a 1:1 mapping
372  *	>0 : is a 1:1 mapping
373  * author:
374  *	paul vixie, 29may94
375  */
376 int res_queriesmatch(const uint8_t* buf1, const uint8_t* eom1, const uint8_t* buf2,
377                      const uint8_t* eom2) {
378     const uint8_t* cp = buf1 + HFIXEDSZ;
379     int qdcount = ntohs(((const HEADER*) (const void*) buf1)->qdcount);
380 
381     if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2) return (-1);
382 
383     /*
384      * Only header section present in replies to
385      * dynamic update packets.
386      */
387     if ((((const HEADER*) (const void*) buf1)->opcode == ns_o_update) &&
388         (((const HEADER*) (const void*) buf2)->opcode == ns_o_update))
389         return (1);
390 
391     if (qdcount != ntohs(((const HEADER*) (const void*) buf2)->qdcount)) return (0);
392     while (qdcount-- > 0) {
393         char tname[MAXDNAME + 1];
394         int n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
395         if (n < 0) return (-1);
396         cp += n;
397         if (cp + 2 * INT16SZ > eom1) return (-1);
398         int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
399         cp += INT16SZ;
400         int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
401         cp += INT16SZ;
402         if (!res_nameinquery(tname, ttype, tclass, buf2, eom2)) return (0);
403     }
404     return (1);
405 }
406 
407 static DnsQueryEvent* addDnsQueryEvent(NetworkDnsEventReported* event) {
408     return event->mutable_dns_query_events()->add_dns_query_event();
409 }
410 
411 static bool isNetworkRestricted(int terrno) {
412     // It's possible that system was in some network restricted mode, which blocked
413     // the operation of sending packet and resulted in EPERM errno.
414     // It would be no reason to keep retrying on that case.
415     // TODO: Check the system status to know if network restricted mode is
416     // enabled.
417     return (terrno == EPERM);
418 }
419 
420 int res_nsend(res_state statp, const uint8_t* buf, int buflen, uint8_t* ans, int anssiz, int* rcode,
421               uint32_t flags, std::chrono::milliseconds sleepTimeMs) {
422     LOG(DEBUG) << __func__;
423 
424     // Should not happen
425     if (anssiz < HFIXEDSZ) {
426         // TODO: Remove errno once callers stop using it
427         errno = EINVAL;
428         return -EINVAL;
429     }
430     res_pquery(buf, buflen);
431 
432     int anslen = 0;
433     Stopwatch cacheStopwatch;
434     ResolvCacheStatus cache_status =
435             resolv_cache_lookup(statp->netid, buf, buflen, ans, anssiz, &anslen, flags);
436     const int32_t cacheLatencyUs = saturate_cast<int32_t>(cacheStopwatch.timeTakenUs());
437     if (cache_status == RESOLV_CACHE_FOUND) {
438         HEADER* hp = (HEADER*)(void*)ans;
439         *rcode = hp->rcode;
440         DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
441         dnsQueryEvent->set_latency_micros(cacheLatencyUs);
442         dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
443         dnsQueryEvent->set_type(getQueryType(buf, buflen));
444         return anslen;
445     } else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
446         // had a cache miss for a known network, so populate the thread private
447         // data so the normal resolve path can do its thing
448         resolv_populate_res_for_net(statp);
449     }
450     if (statp->nameserverCount() == 0) {
451         // We have no nameservers configured, so there's no point trying.
452         // Tell the cache the query failed, or any retries and anyone else asking the same
453         // question will block for PENDING_REQUEST_TIMEOUT seconds instead of failing fast.
454         _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
455 
456         // TODO: Remove errno once callers stop using it
457         errno = ESRCH;
458         return -ESRCH;
459     }
460 
461     // If parallel_lookup is enabled, it might be required to wait some time to avoid
462     // gateways drop packets if queries are sent too close together
463     if (sleepTimeMs != 0ms) {
464         std::this_thread::sleep_for(sleepTimeMs);
465     }
466     // DoT
467     if (!(statp->netcontext_flags & NET_CONTEXT_FLAG_USE_LOCAL_NAMESERVERS)) {
468         bool fallback = false;
469         int resplen = res_tls_send(statp, Slice(const_cast<uint8_t*>(buf), buflen),
470                                    Slice(ans, anssiz), rcode, &fallback);
471         if (resplen > 0) {
472             LOG(DEBUG) << __func__ << ": got answer from DoT";
473             res_pquery(ans, resplen);
474             if (cache_status == RESOLV_CACHE_NOTFOUND) {
475                 resolv_cache_add(statp->netid, buf, buflen, ans, resplen);
476             }
477             return resplen;
478         }
479         if (!fallback) {
480             _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
481             return -ETIMEDOUT;
482         }
483     }
484 
485     res_stats stats[MAXNS]{};
486     res_params params;
487     int revision_id = resolv_cache_get_resolver_stats(statp->netid, &params, stats, statp->nsaddrs);
488     if (revision_id < 0) {
489         // TODO: Remove errno once callers stop using it
490         errno = ESRCH;
491         return -ESRCH;
492     }
493     bool usable_servers[MAXNS];
494     int usableServersCount = android_net_res_stats_get_usable_servers(
495             &params, stats, statp->nameserverCount(), usable_servers);
496 
497     if (statp->sort_nameservers) {
498         // It's unnecessary to mark a DNS server as unusable since broken servers will be less
499         // likely to be chosen.
500         for (int i = 0; i < statp->nameserverCount(); i++) {
501             usable_servers[i] = true;
502         }
503     }
504 
505     // TODO: Let it always choose the first nameserver when sort_nameservers is enabled.
506     if ((flags & ANDROID_RESOLV_NO_RETRY) && usableServersCount > 1) {
507         auto hp = reinterpret_cast<const HEADER*>(buf);
508 
509         // Select a random server based on the query id
510         int selectedServer = (hp->id % usableServersCount) + 1;
511         res_set_usable_server(selectedServer, statp->nameserverCount(), usable_servers);
512     }
513 
514     // Send request, RETRY times, or until successful.
515     int retryTimes = (flags & ANDROID_RESOLV_NO_RETRY) ? 1 : params.retry_count;
516     int useTcp = buflen > PACKETSZ;
517     int gotsomewhere = 0;
518     // Use an impossible error code as default value
519     int terrno = ETIME;
520 
521     for (int attempt = 0; attempt < retryTimes; ++attempt) {
522         for (size_t ns = 0; ns < statp->nsaddrs.size(); ++ns) {
523             if (!usable_servers[ns]) continue;
524 
525             *rcode = RCODE_INTERNAL_ERROR;
526 
527             // Get server addr
528             const IPSockAddr& serverSockAddr = statp->nsaddrs[ns];
529             LOG(DEBUG) << __func__ << ": Querying server (# " << ns + 1
530                        << ") address = " << serverSockAddr.toString();
531 
532             ::android::net::Protocol query_proto = useTcp ? PROTO_TCP : PROTO_UDP;
533             time_t query_time = 0;
534             int delay = 0;
535             bool fallbackTCP = false;
536             const bool shouldRecordStats = (attempt == 0);
537             int resplen;
538             Stopwatch queryStopwatch;
539             int retry_count_for_event = 0;
540             size_t actualNs = ns;
541             // Use an impossible error code as default value
542             terrno = ETIME;
543             if (useTcp) {
544                 // TCP; at most one attempt per server.
545                 attempt = retryTimes;
546                 resplen = send_vc(statp, &params, buf, buflen, ans, anssiz, &terrno, ns,
547                                   &query_time, rcode, &delay);
548 
549                 if (buflen <= PACKETSZ && resplen <= 0 &&
550                     statp->tc_mode == aidl::android::net::IDnsResolver::TC_MODE_UDP_TCP) {
551                     // reset to UDP for next query on next DNS server if resolver is currently doing
552                     // TCP fallback retry and current server does not support TCP connectin
553                     useTcp = false;
554                 }
555                 LOG(INFO) << __func__ << ": used send_vc " << resplen << " terrno: " << terrno;
556             } else {
557                 // UDP
558                 resplen = send_dg(statp, &params, buf, buflen, ans, anssiz, &terrno, &actualNs,
559                                   &useTcp, &gotsomewhere, &query_time, rcode, &delay);
560                 fallbackTCP = useTcp ? true : false;
561                 retry_count_for_event = attempt;
562                 LOG(INFO) << __func__ << ": used send_dg " << resplen << " terrno: " << terrno;
563             }
564 
565             const IPSockAddr& receivedServerAddr = statp->nsaddrs[actualNs];
566             DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
567             dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
568             // When |retryTimes| > 1, we cannot actually know the correct latency value if we
569             // received the answer from the previous server. So temporarily set the latency as -1 if
570             // that condition happened.
571             // TODO: make the latency value accurate.
572             dnsQueryEvent->set_latency_micros(
573                     (actualNs == ns) ? saturate_cast<int32_t>(queryStopwatch.timeTakenUs()) : -1);
574             dnsQueryEvent->set_dns_server_index(actualNs);
575             dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(receivedServerAddr.family()));
576             dnsQueryEvent->set_retry_times(retry_count_for_event);
577             dnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
578             dnsQueryEvent->set_protocol(query_proto);
579             dnsQueryEvent->set_type(getQueryType(buf, buflen));
580             dnsQueryEvent->set_linux_errno(static_cast<LinuxErrno>(terrno));
581 
582             // Only record stats the first time we try a query. This ensures that
583             // queries that deterministically fail (e.g., a name that always returns
584             // SERVFAIL or times out) do not unduly affect the stats.
585             if (shouldRecordStats) {
586                 // (b/151166599): This is a workaround to prevent that DnsResolver calculates the
587                 // reliability of DNS servers from being broken when network restricted mode is
588                 // enabled.
589                 // TODO: Introduce the new server selection instead of skipping stats recording.
590                 if (!isNetworkRestricted(terrno)) {
591                     res_sample sample;
592                     res_stats_set_sample(&sample, query_time, *rcode, delay);
593                     // KeepListening UDP mechanism is incompatible with usable_servers of legacy
594                     // stats, so keep the old logic for now.
595                     // TODO: Replace usable_servers of legacy stats with new one.
596                     resolv_cache_add_resolver_stats_sample(
597                             statp->netid, revision_id, serverSockAddr, sample, params.max_samples);
598                     resolv_stats_add(statp->netid, receivedServerAddr, dnsQueryEvent);
599                 }
600             }
601 
602             if (resplen == 0) continue;
603             if (fallbackTCP) {
604                 ns--;
605                 continue;
606             }
607             if (resplen < 0) {
608                 _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
609                 statp->closeSockets();
610                 return -terrno;
611             };
612 
613             LOG(DEBUG) << __func__ << ": got answer:";
614             res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
615 
616             if (cache_status == RESOLV_CACHE_NOTFOUND) {
617                 resolv_cache_add(statp->netid, buf, buflen, ans, resplen);
618             }
619             statp->closeSockets();
620             return (resplen);
621         }  // for each ns
622     }  // for each retry
623     statp->closeSockets();
624     terrno = useTcp ? terrno : gotsomewhere ? ETIMEDOUT : ECONNREFUSED;
625     // TODO: Remove errno once callers stop using it
626     errno = useTcp ? terrno
627                    : gotsomewhere ? ETIMEDOUT /* no answer obtained */
628                                   : ECONNREFUSED /* no nameservers found */;
629 
630     _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
631     return -terrno;
632 }
633 
634 static struct timespec get_timeout(res_state statp, const res_params* params, const int ns) {
635     int msec;
636     // Legacy algorithm which scales the timeout by nameserver number.
637     // For instance, with 4 nameservers: 5s, 2.5s, 5s, 10s
638     // This has no effect with 1 or 2 nameservers
639     msec = params->base_timeout_msec << ns;
640     if (ns > 0) {
641         msec /= statp->nameserverCount();
642     }
643     // For safety, don't allow OEMs and experiments to configure a timeout shorter than 1s.
644     if (msec < 1000) {
645         msec = 1000;  // Use at least 1000ms
646     }
647     LOG(INFO) << __func__ << ": using timeout of " << msec << " msec";
648 
649     struct timespec result;
650     result.tv_sec = msec / 1000;
651     result.tv_nsec = (msec % 1000) * 1000000;
652     return result;
653 }
654 
655 static int send_vc(res_state statp, res_params* params, const uint8_t* buf, int buflen,
656                    uint8_t* ans, int anssiz, int* terrno, size_t ns, time_t* at, int* rcode,
657                    int* delay) {
658     *at = time(NULL);
659     *delay = 0;
660     const HEADER* hp = (const HEADER*) (const void*) buf;
661     HEADER* anhp = (HEADER*) (void*) ans;
662     struct sockaddr* nsap;
663     int nsaplen;
664     int truncating, connreset, n;
665     uint8_t* cp;
666 
667     LOG(INFO) << __func__ << ": using send_vc";
668 
669     // It should never happen, but just in case.
670     if (ns >= statp->nsaddrs.size()) {
671         LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
672         *terrno = EINVAL;
673         return -1;
674     }
675 
676     sockaddr_storage ss = statp->nsaddrs[ns];
677     nsap = reinterpret_cast<sockaddr*>(&ss);
678     nsaplen = sockaddrSize(nsap);
679 
680     connreset = 0;
681 same_ns:
682     truncating = 0;
683 
684     struct timespec start_time = evNowTime();
685 
686     /* Are we still talking to whom we want to talk to? */
687     if (statp->tcp_nssock >= 0 && (statp->_flags & RES_F_VC) != 0) {
688         struct sockaddr_storage peer;
689         socklen_t size = sizeof peer;
690         unsigned old_mark;
691         socklen_t mark_size = sizeof(old_mark);
692         if (getpeername(statp->tcp_nssock, (struct sockaddr*)(void*)&peer, &size) < 0 ||
693             !sock_eq((struct sockaddr*)(void*)&peer, nsap) ||
694             getsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &old_mark, &mark_size) < 0 ||
695             old_mark != statp->_mark) {
696             statp->closeSockets();
697         }
698     }
699 
700     if (statp->tcp_nssock < 0 || (statp->_flags & RES_F_VC) == 0) {
701         if (statp->tcp_nssock >= 0) statp->closeSockets();
702 
703         statp->tcp_nssock.reset(socket(nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0));
704         if (statp->tcp_nssock < 0) {
705             *terrno = errno;
706             PLOG(DEBUG) << __func__ << ": socket(vc): ";
707             switch (errno) {
708                 case EPROTONOSUPPORT:
709                 case EPFNOSUPPORT:
710                 case EAFNOSUPPORT:
711                     return 0;
712                 default:
713                     return -1;
714             }
715         }
716         const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
717         resolv_tag_socket(statp->tcp_nssock, uid, statp->pid);
718         if (statp->_mark != MARK_UNSET) {
719             if (setsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &statp->_mark,
720                            sizeof(statp->_mark)) < 0) {
721                 *terrno = errno;
722                 PLOG(DEBUG) << __func__ << ": setsockopt: ";
723                 return -1;
724             }
725         }
726         errno = 0;
727         if (random_bind(statp->tcp_nssock, nsap->sa_family) < 0) {
728             *terrno = errno;
729             dump_error("bind/vc", nsap, nsaplen);
730             statp->closeSockets();
731             return (0);
732         }
733         if (connect_with_timeout(statp->tcp_nssock, nsap, (socklen_t)nsaplen,
734                                  get_timeout(statp, params, ns)) < 0) {
735             *terrno = errno;
736             dump_error("connect/vc", nsap, nsaplen);
737             statp->closeSockets();
738             /*
739              * The way connect_with_timeout() is implemented prevents us from reliably
740              * determining whether this was really a timeout or e.g. ECONNREFUSED. Since
741              * currently both cases are handled in the same way, there is no need to
742              * change this (yet). If we ever need to reliably distinguish between these
743              * cases, both connect_with_timeout() and retrying_poll() need to be
744              * modified, though.
745              */
746             *rcode = RCODE_TIMEOUT;
747             return (0);
748         }
749         statp->_flags |= RES_F_VC;
750     }
751 
752     /*
753      * Send length & message
754      */
755     uint16_t len = htons(static_cast<uint16_t>(buflen));
756     const iovec iov[] = {
757             {.iov_base = &len, .iov_len = INT16SZ},
758             {.iov_base = const_cast<uint8_t*>(buf), .iov_len = static_cast<size_t>(buflen)},
759     };
760     if (writev(statp->tcp_nssock, iov, 2) != (INT16SZ + buflen)) {
761         *terrno = errno;
762         PLOG(DEBUG) << __func__ << ": write failed: ";
763         statp->closeSockets();
764         return (0);
765     }
766     /*
767      * Receive length & response
768      */
769 read_len:
770     cp = ans;
771     len = INT16SZ;
772     while ((n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
773         cp += n;
774         if ((len -= n) == 0) break;
775     }
776     if (n <= 0) {
777         *terrno = errno;
778         PLOG(DEBUG) << __func__ << ": read failed: ";
779         statp->closeSockets();
780         /*
781          * A long running process might get its TCP
782          * connection reset if the remote server was
783          * restarted.  Requery the server instead of
784          * trying a new one.  When there is only one
785          * server, this means that a query might work
786          * instead of failing.  We only allow one reset
787          * per query to prevent looping.
788          */
789         if (*terrno == ECONNRESET && !connreset) {
790             connreset = 1;
791             goto same_ns;
792         }
793         return (0);
794     }
795     uint16_t resplen = ntohs(*reinterpret_cast<const uint16_t*>(ans));
796     if (resplen > anssiz) {
797         LOG(DEBUG) << __func__ << ": response truncated";
798         truncating = 1;
799         len = anssiz;
800     } else
801         len = resplen;
802     if (len < HFIXEDSZ) {
803         /*
804          * Undersized message.
805          */
806         LOG(DEBUG) << __func__ << ": undersized: " << len;
807         *terrno = EMSGSIZE;
808         statp->closeSockets();
809         return (0);
810     }
811     cp = ans;
812     while (len != 0 && (n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
813         cp += n;
814         len -= n;
815     }
816     if (n <= 0) {
817         *terrno = errno;
818         PLOG(DEBUG) << __func__ << ": read(vc): ";
819         statp->closeSockets();
820         return (0);
821     }
822 
823     if (truncating) {
824         /*
825          * Flush rest of answer so connection stays in synch.
826          */
827         anhp->tc = 1;
828         len = resplen - anssiz;
829         while (len != 0) {
830             char junk[PACKETSZ];
831 
832             n = read(statp->tcp_nssock, junk, (len > sizeof junk) ? sizeof junk : len);
833             if (n > 0)
834                 len -= n;
835             else
836                 break;
837         }
838         LOG(WARNING) << __func__ << ": resplen " << resplen << " exceeds buf size " << anssiz;
839         // return size should never exceed container size
840         resplen = anssiz;
841     }
842     /*
843      * If the calling application has bailed out of
844      * a previous call and failed to arrange to have
845      * the circuit closed or the server has got
846      * itself confused, then drop the packet and
847      * wait for the correct one.
848      */
849     if (hp->id != anhp->id) {
850         LOG(DEBUG) << __func__ << ": ld answer (unexpected):";
851         res_pquery(ans, resplen);
852         goto read_len;
853     }
854 
855     /*
856      * All is well, or the error is fatal.  Signal that the
857      * next nameserver ought not be tried.
858      */
859     if (resplen > 0) {
860         struct timespec done = evNowTime();
861         *delay = res_stats_calculate_rtt(&done, &start_time);
862         *rcode = anhp->rcode;
863     }
864     *terrno = 0;
865     return (resplen);
866 }
867 
868 /* return -1 on error (errno set), 0 on success */
869 static int connect_with_timeout(int sock, const sockaddr* nsap, socklen_t salen,
870                                 const timespec timeout) {
871     int res, origflags;
872 
873     origflags = fcntl(sock, F_GETFL, 0);
874     fcntl(sock, F_SETFL, origflags | O_NONBLOCK);
875 
876     res = connect(sock, nsap, salen);
877     if (res < 0 && errno != EINPROGRESS) {
878         res = -1;
879         goto done;
880     }
881     if (res != 0) {
882         timespec now = evNowTime();
883         timespec finish = evAddTime(now, timeout);
884         LOG(INFO) << __func__ << ": " << sock << " send_vc";
885         res = retrying_poll(sock, POLLIN | POLLOUT, &finish);
886         if (res <= 0) {
887             res = -1;
888         }
889     }
890 done:
891     fcntl(sock, F_SETFL, origflags);
892     LOG(INFO) << __func__ << ": " << sock << " connect_with_const timeout returning " << res;
893     return res;
894 }
895 
896 static int retrying_poll(const int sock, const short events, const struct timespec* finish) {
897     struct timespec now, timeout;
898 
899 retry:
900     LOG(INFO) << __func__ << ": " << sock << " retrying_poll";
901 
902     now = evNowTime();
903     if (evCmpTime(*finish, now) > 0)
904         timeout = evSubTime(*finish, now);
905     else
906         timeout = evConsTime(0L, 0L);
907     struct pollfd fds = {.fd = sock, .events = events};
908     int n = ppoll(&fds, 1, &timeout, /*__mask=*/NULL);
909     if (n == 0) {
910         LOG(INFO) << __func__ << ": " << sock << " retrying_poll timeout";
911         errno = ETIMEDOUT;
912         return 0;
913     }
914     if (n < 0) {
915         if (errno == EINTR) goto retry;
916         PLOG(INFO) << __func__ << ": " << sock << " retrying_poll failed";
917         return n;
918     }
919     if (fds.revents & (POLLIN | POLLOUT | POLLERR)) {
920         int error;
921         socklen_t len = sizeof(error);
922         if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0 || error) {
923             errno = error;
924             PLOG(INFO) << __func__ << ": " << sock << " retrying_poll getsockopt failed";
925             return -1;
926         }
927     }
928     LOG(INFO) << __func__ << ": " << sock << " retrying_poll returning " << n;
929     return n;
930 }
931 
932 static std::vector<pollfd> extractUdpFdset(res_state statp, const short events = POLLIN) {
933     std::vector<pollfd> fdset(statp->nsaddrs.size());
934     for (size_t i = 0; i < statp->nsaddrs.size(); ++i) {
935         fdset[i] = {.fd = statp->nssocks[i], .events = events};
936     }
937     return fdset;
938 }
939 
940 static Result<std::vector<int>> udpRetryingPoll(res_state statp, const timespec* finish) {
941     for (;;) {
942         LOG(DEBUG) << __func__ << ": poll";
943         timespec start_time = evNowTime();
944         timespec timeout = (evCmpTime(*finish, start_time) > 0) ? evSubTime(*finish, start_time)
945                                                                 : evConsTime(0L, 0L);
946         std::vector<pollfd> fdset = extractUdpFdset(statp);
947         const int n = ppoll(fdset.data(), fdset.size(), &timeout, /*__mask=*/nullptr);
948         if (n <= 0) {
949             if (errno == EINTR && n < 0) continue;
950             if (n == 0) errno = ETIMEDOUT;
951             PLOG(INFO) << __func__ << ": failed";
952             return ErrnoError();
953         }
954         std::vector<int> fdsToRead;
955         for (const auto& pollfd : fdset) {
956             if (pollfd.revents & (POLLIN | POLLERR)) {
957                 fdsToRead.push_back(pollfd.fd);
958             }
959         }
960         LOG(DEBUG) << __func__ << ": "
961                    << " returning fd size: " << fdsToRead.size();
962         return fdsToRead;
963     }
964 }
965 
966 static Result<std::vector<int>> udpRetryingPollWrapper(res_state statp, int ns,
967                                                        const timespec* finish) {
968     const bool keepListeningUdp =
969             android::net::Experiments::getInstance()->getFlag("keep_listening_udp", 0);
970     if (keepListeningUdp) return udpRetryingPoll(statp, finish);
971 
972     if (int n = retrying_poll(statp->nssocks[ns], POLLIN, finish); n <= 0) {
973         return ErrnoError();
974     }
975     return std::vector<int>{statp->nssocks[ns]};
976 }
977 
978 bool ignoreInvalidAnswer(res_state statp, const sockaddr_storage& from, const uint8_t* buf,
979                          int buflen, uint8_t* ans, int anssiz, int* receivedFromNs) {
980     const HEADER* hp = (const HEADER*)(const void*)buf;
981     HEADER* anhp = (HEADER*)(void*)ans;
982     if (hp->id != anhp->id) {
983         // response from old query, ignore it.
984         LOG(DEBUG) << __func__ << ": old answer:";
985         return true;
986     }
987     if (*receivedFromNs = res_ourserver_p(statp, (sockaddr*)(void*)&from); *receivedFromNs < 0) {
988         // response from wrong server? ignore it.
989         LOG(DEBUG) << __func__ << ": not our server:";
990         return true;
991     }
992     if (!res_queriesmatch(buf, buf + buflen, ans, ans + anssiz)) {
993         // response contains wrong query? ignore it.
994         LOG(DEBUG) << __func__ << ": wrong query name:";
995         return true;
996     }
997     return false;
998 }
999 
1000 static int send_dg(res_state statp, res_params* params, const uint8_t* buf, int buflen,
1001                    uint8_t* ans, int anssiz, int* terrno, size_t* ns, int* v_circuit,
1002                    int* gotsomewhere, time_t* at, int* rcode, int* delay) {
1003     // It should never happen, but just in case.
1004     if (*ns >= statp->nsaddrs.size()) {
1005         LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
1006         *terrno = EINVAL;
1007         return -1;
1008     }
1009 
1010     *at = time(nullptr);
1011     *delay = 0;
1012     const sockaddr_storage ss = statp->nsaddrs[*ns];
1013     const sockaddr* nsap = reinterpret_cast<const sockaddr*>(&ss);
1014     const int nsaplen = sockaddrSize(nsap);
1015 
1016     if (statp->nssocks[*ns] == -1) {
1017         statp->nssocks[*ns].reset(socket(nsap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0));
1018         if (statp->nssocks[*ns] < 0) {
1019             *terrno = errno;
1020             PLOG(DEBUG) << __func__ << ": socket(dg): ";
1021             switch (errno) {
1022                 case EPROTONOSUPPORT:
1023                 case EPFNOSUPPORT:
1024                 case EAFNOSUPPORT:
1025                     return (0);
1026                 default:
1027                     return (-1);
1028             }
1029         }
1030 
1031         const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
1032         resolv_tag_socket(statp->nssocks[*ns], uid, statp->pid);
1033         if (statp->_mark != MARK_UNSET) {
1034             if (setsockopt(statp->nssocks[*ns], SOL_SOCKET, SO_MARK, &(statp->_mark),
1035                            sizeof(statp->_mark)) < 0) {
1036                 *terrno = errno;
1037                 statp->closeSockets();
1038                 return -1;
1039             }
1040         }
1041         // Use a "connected" datagram socket to receive an ECONNREFUSED error
1042         // on the next socket operation when the server responds with an
1043         // ICMP port-unreachable error. This way we can detect the absence of
1044         // a nameserver without timing out.
1045         if (random_bind(statp->nssocks[*ns], nsap->sa_family) < 0) {
1046             *terrno = errno;
1047             dump_error("bind(dg)", nsap, nsaplen);
1048             statp->closeSockets();
1049             return (0);
1050         }
1051         if (connect(statp->nssocks[*ns], nsap, (socklen_t)nsaplen) < 0) {
1052             *terrno = errno;
1053             dump_error("connect(dg)", nsap, nsaplen);
1054             statp->closeSockets();
1055             return (0);
1056         }
1057         LOG(DEBUG) << __func__ << ": new DG socket";
1058     }
1059     if (send(statp->nssocks[*ns], (const char*)buf, (size_t)buflen, 0) != buflen) {
1060         *terrno = errno;
1061         PLOG(DEBUG) << __func__ << ": send: ";
1062         statp->closeSockets();
1063         return 0;
1064     }
1065 
1066     timespec timeout = get_timeout(statp, params, *ns);
1067     timespec start_time = evNowTime();
1068     timespec finish = evAddTime(start_time, timeout);
1069     for (;;) {
1070         // Wait for reply.
1071         auto result = udpRetryingPollWrapper(statp, *ns, &finish);
1072 
1073         if (!result.has_value()) {
1074             const bool isTimeout = (result.error().code() == ETIMEDOUT);
1075             *rcode = (isTimeout) ? RCODE_TIMEOUT : *rcode;
1076             *terrno = (isTimeout) ? ETIMEDOUT : errno;
1077             *gotsomewhere = (isTimeout) ? 1 : *gotsomewhere;
1078             // Leave the UDP sockets open on timeout so we can keep listening for
1079             // a late response from this server while retrying on the next server.
1080             if (!isTimeout) statp->closeSockets();
1081             LOG(DEBUG) << __func__ << ": " << (isTimeout) ? "timeout" : "poll";
1082             return 0;
1083         }
1084         bool needRetry = false;
1085         for (int fd : result.value()) {
1086             needRetry = false;
1087             sockaddr_storage from;
1088             socklen_t fromlen = sizeof(from);
1089             int resplen =
1090                     recvfrom(fd, (char*)ans, (size_t)anssiz, 0, (sockaddr*)(void*)&from, &fromlen);
1091             if (resplen <= 0) {
1092                 *terrno = errno;
1093                 PLOG(DEBUG) << __func__ << ": recvfrom: ";
1094                 continue;
1095             }
1096             *gotsomewhere = 1;
1097             if (resplen < HFIXEDSZ) {
1098                 // Undersized message.
1099                 LOG(DEBUG) << __func__ << ": undersized: " << resplen;
1100                 *terrno = EMSGSIZE;
1101                 continue;
1102             }
1103 
1104             int receivedFromNs = *ns;
1105             if (needRetry =
1106                         ignoreInvalidAnswer(statp, from, buf, buflen, ans, anssiz, &receivedFromNs);
1107                 needRetry) {
1108                 res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
1109                 continue;
1110             }
1111 
1112             HEADER* anhp = (HEADER*)(void*)ans;
1113             if (anhp->rcode == FORMERR && (statp->netcontext_flags & NET_CONTEXT_FLAG_USE_EDNS)) {
1114                 //  Do not retry if the server do not understand EDNS0.
1115                 //  The case has to be captured here, as FORMERR packet do not
1116                 //  carry query section, hence res_queriesmatch() returns 0.
1117                 LOG(DEBUG) << __func__ << ": server rejected query with EDNS0:";
1118                 res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
1119                 // record the error
1120                 statp->_flags |= RES_F_EDNS0ERR;
1121                 *terrno = EREMOTEIO;
1122                 continue;
1123             }
1124 
1125             timespec done = evNowTime();
1126             *delay = res_stats_calculate_rtt(&done, &start_time);
1127             if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
1128                 LOG(DEBUG) << __func__ << ": server rejected query:";
1129                 res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
1130                 *rcode = anhp->rcode;
1131                 continue;
1132             }
1133             if (anhp->tc) {
1134                 // To get the rest of answer,
1135                 // use TCP with same server.
1136                 LOG(DEBUG) << __func__ << ": truncated answer";
1137                 *terrno = E2BIG;
1138                 *v_circuit = 1;
1139                 return 1;
1140             }
1141             // All is well, or the error is fatal. Signal that the
1142             // next nameserver ought not be tried.
1143 
1144             *rcode = anhp->rcode;
1145             *ns = receivedFromNs;
1146             *terrno = 0;
1147             return resplen;
1148         }
1149         if (!needRetry) return 0;
1150     }
1151 }
1152 
1153 static void dump_error(const char* str, const struct sockaddr* address, int alen) {
1154     char hbuf[NI_MAXHOST];
1155     char sbuf[NI_MAXSERV];
1156     constexpr int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
1157     const int err = errno;
1158 
1159     if (!WOULD_LOG(DEBUG)) return;
1160 
1161     if (getnameinfo(address, (socklen_t)alen, hbuf, sizeof(hbuf), sbuf, sizeof(sbuf), niflags)) {
1162         strncpy(hbuf, "?", sizeof(hbuf) - 1);
1163         hbuf[sizeof(hbuf) - 1] = '\0';
1164         strncpy(sbuf, "?", sizeof(sbuf) - 1);
1165         sbuf[sizeof(sbuf) - 1] = '\0';
1166     }
1167     errno = err;
1168     PLOG(DEBUG) << __func__ << ": " << str << " ([" << hbuf << "]." << sbuf << "): ";
1169 }
1170 
1171 static int sock_eq(struct sockaddr* a, struct sockaddr* b) {
1172     struct sockaddr_in *a4, *b4;
1173     struct sockaddr_in6 *a6, *b6;
1174 
1175     if (a->sa_family != b->sa_family) return 0;
1176     switch (a->sa_family) {
1177         case AF_INET:
1178             a4 = (struct sockaddr_in*) (void*) a;
1179             b4 = (struct sockaddr_in*) (void*) b;
1180             return a4->sin_port == b4->sin_port && a4->sin_addr.s_addr == b4->sin_addr.s_addr;
1181         case AF_INET6:
1182             a6 = (struct sockaddr_in6*) (void*) a;
1183             b6 = (struct sockaddr_in6*) (void*) b;
1184             return a6->sin6_port == b6->sin6_port &&
1185 #ifdef HAVE_SIN6_SCOPE_ID
1186                    a6->sin6_scope_id == b6->sin6_scope_id &&
1187 #endif
1188                    IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
1189         default:
1190             return 0;
1191     }
1192 }
1193 
1194 PrivateDnsModes convertEnumType(PrivateDnsMode privateDnsmode) {
1195     switch (privateDnsmode) {
1196         case PrivateDnsMode::OFF:
1197             return PrivateDnsModes::PDM_OFF;
1198         case PrivateDnsMode::OPPORTUNISTIC:
1199             return PrivateDnsModes::PDM_OPPORTUNISTIC;
1200         case PrivateDnsMode::STRICT:
1201             return PrivateDnsModes::PDM_STRICT;
1202         default:
1203             return PrivateDnsModes::PDM_UNKNOWN;
1204     }
1205 }
1206 
1207 static int res_tls_send(res_state statp, const Slice query, const Slice answer, int* rcode,
1208                         bool* fallback) {
1209     int resplen = 0;
1210     const unsigned netId = statp->netid;
1211 
1212     auto& privateDnsConfiguration = PrivateDnsConfiguration::getInstance();
1213     PrivateDnsStatus privateDnsStatus = privateDnsConfiguration.getStatus(netId);
1214     statp->event->set_private_dns_modes(convertEnumType(privateDnsStatus.mode));
1215 
1216     if (privateDnsStatus.mode == PrivateDnsMode::OFF) {
1217         *fallback = true;
1218         return -1;
1219     }
1220 
1221     if (privateDnsStatus.validatedServers().empty()) {
1222         if (privateDnsStatus.mode == PrivateDnsMode::OPPORTUNISTIC) {
1223             *fallback = true;
1224             return -1;
1225         } else {
1226             // Sleep and iterate some small number of times checking for the
1227             // arrival of resolved and validated server IP addresses, instead
1228             // of returning an immediate error.
1229             // This is needed because as soon as a network becomes the default network, apps will
1230             // send DNS queries on that network. If no servers have yet validated, and we do not
1231             // block those queries, they would immediately fail, causing application-visible errors.
1232             // Note that this can happen even before the network validates, since an unvalidated
1233             // network can become the default network if no validated networks are available.
1234             //
1235             // TODO: see if there is a better way to address this problem, such as buffering the
1236             // queries in a queue or only blocking queries for the first few seconds after a default
1237             // network change.
1238             for (int i = 0; i < 42; i++) {
1239                 std::this_thread::sleep_for(std::chrono::milliseconds(100));
1240                 // Calling getStatus() to merely check if there's any validated server seems
1241                 // wasteful. Consider adding a new method in PrivateDnsConfiguration for speed ups.
1242                 if (!privateDnsConfiguration.getStatus(netId).validatedServers().empty()) {
1243                     privateDnsStatus = privateDnsConfiguration.getStatus(netId);
1244                     break;
1245                 }
1246             }
1247             if (privateDnsStatus.validatedServers().empty()) {
1248                 return -1;
1249             }
1250         }
1251     }
1252 
1253     LOG(INFO) << __func__ << ": performing query over TLS";
1254 
1255     const auto response = DnsTlsDispatcher::getInstance().query(privateDnsStatus.validatedServers(),
1256                                                                 statp, query, answer, &resplen);
1257 
1258     LOG(INFO) << __func__ << ": TLS query result: " << static_cast<int>(response);
1259 
1260     if (privateDnsStatus.mode == PrivateDnsMode::OPPORTUNISTIC) {
1261         // In opportunistic mode, handle falling back to cleartext in some
1262         // cases (DNS shouldn't fail if a validated opportunistic mode server
1263         // becomes unreachable for some reason).
1264         switch (response) {
1265             case DnsTlsTransport::Response::success:
1266                 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1267                 return resplen;
1268             case DnsTlsTransport::Response::network_error:
1269                 // No need to set the error timeout here since it will fallback to UDP.
1270             case DnsTlsTransport::Response::internal_error:
1271                 // Note: this will cause cleartext queries to be emitted, with
1272                 // all of the EDNS0 goodness enabled. Fingers crossed.  :-/
1273                 *fallback = true;
1274                 [[fallthrough]];
1275             default:
1276                 return -1;
1277         }
1278     } else {
1279         // Strict mode
1280         switch (response) {
1281             case DnsTlsTransport::Response::success:
1282                 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1283                 return resplen;
1284             case DnsTlsTransport::Response::network_error:
1285                 // This case happens when the query stored in DnsTlsTransport is expired since
1286                 // either 1) the query has been tried for 3 times but no response or 2) fail to
1287                 // establish the connection with the server.
1288                 *rcode = RCODE_TIMEOUT;
1289                 [[fallthrough]];
1290             default:
1291                 return -1;
1292         }
1293     }
1294 }
1295 
1296 int resolv_res_nsend(const android_net_context* netContext, const uint8_t* msg, int msgLen,
1297                      uint8_t* ans, int ansLen, int* rcode, uint32_t flags,
1298                      NetworkDnsEventReported* event) {
1299     assert(event != nullptr);
1300     ResState res(netContext, event);
1301     resolv_populate_res_for_net(&res);
1302     *rcode = NOERROR;
1303     return res_nsend(&res, msg, msgLen, ans, ansLen, rcode, flags);
1304 }
1305