1 /*	$NetBSD: res_send.c,v 1.9 2006/01/24 17:41:25 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1985, 1989, 1993
5  *    The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  * 	This product includes software developed by the University of
18  * 	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * Portions Copyright (c) 1993 by Digital Equipment Corporation.
38  *
39  * Permission to use, copy, modify, and distribute this software for any
40  * purpose with or without fee is hereby granted, provided that the above
41  * copyright notice and this permission notice appear in all copies, and that
42  * the name of Digital Equipment Corporation not be used in advertising or
43  * publicity pertaining to distribution of the document or software without
44  * specific, written prior permission.
45  *
46  * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
47  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
48  * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
49  * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
50  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
51  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
52  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53  * SOFTWARE.
54  */
55 
56 /*
57  * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
58  * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
59  *
60  * Permission to use, copy, modify, and distribute this software for any
61  * purpose with or without fee is hereby granted, provided that the above
62  * copyright notice and this permission notice appear in all copies.
63  *
64  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
65  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66  * MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR
67  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
70  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71  */
72 
73 /*
74  * Send query to name server and wait for reply.
75  */
76 
77 #define LOG_TAG "resolv"
78 
79 #include <chrono>
80 
81 #include <sys/param.h>
82 #include <sys/socket.h>
83 #include <sys/time.h>
84 #include <sys/uio.h>
85 
86 #include <arpa/inet.h>
87 #include <arpa/nameser.h>
88 
89 #include <errno.h>
90 #include <fcntl.h>
91 #include <netdb.h>
92 #include <poll.h>
93 #include <signal.h>
94 #include <stdlib.h>
95 #include <string.h>
96 #include <time.h>
97 #include <unistd.h>
98 
99 #include <android-base/logging.h>
100 #include <android-base/result.h>
101 #include <android/multinetwork.h>  // ResNsendFlags
102 
103 #include <netdutils/Slice.h>
104 #include <netdutils/Stopwatch.h>
105 #include "DnsTlsDispatcher.h"
106 #include "DnsTlsTransport.h"
107 #include "Experiments.h"
108 #include "PrivateDnsConfiguration.h"
109 #include "netd_resolv/resolv.h"
110 #include "private/android_filesystem_config.h"
111 
112 #include "res_comp.h"
113 #include "res_debug.h"
114 #include "res_init.h"
115 #include "resolv_cache.h"
116 #include "stats.h"
117 #include "stats.pb.h"
118 #include "util.h"
119 
120 using namespace std::chrono_literals;
121 // TODO: use the namespace something like android::netd_resolv for libnetd_resolv
122 using android::base::ErrnoError;
123 using android::base::Result;
124 using android::net::CacheStatus;
125 using android::net::DnsQueryEvent;
126 using android::net::DnsTlsDispatcher;
127 using android::net::DnsTlsTransport;
128 using android::net::gPrivateDnsConfiguration;
129 using android::net::IpVersion;
130 using android::net::IV_IPV4;
131 using android::net::IV_IPV6;
132 using android::net::IV_UNKNOWN;
133 using android::net::LinuxErrno;
134 using android::net::NetworkDnsEventReported;
135 using android::net::NS_T_INVALID;
136 using android::net::NsRcode;
137 using android::net::NsType;
138 using android::net::PrivateDnsMode;
139 using android::net::PrivateDnsModes;
140 using android::net::PrivateDnsStatus;
141 using android::net::PROTO_TCP;
142 using android::net::PROTO_UDP;
143 using android::netdutils::IPSockAddr;
144 using android::netdutils::Slice;
145 using android::netdutils::Stopwatch;
146 
147 static DnsTlsDispatcher sDnsTlsDispatcher;
148 
149 static int send_vc(res_state statp, res_params* params, const uint8_t* buf, int buflen,
150                    uint8_t* ans, int anssiz, int* terrno, size_t ns, time_t* at, int* rcode,
151                    int* delay);
152 static int send_dg(res_state statp, res_params* params, const uint8_t* buf, int buflen,
153                    uint8_t* ans, int anssiz, int* terrno, size_t* ns, int* v_circuit,
154                    int* gotsomewhere, time_t* at, int* rcode, int* delay);
155 
156 static void dump_error(const char*, const struct sockaddr*, int);
157 
158 static int sock_eq(struct sockaddr*, struct sockaddr*);
159 static int connect_with_timeout(int sock, const struct sockaddr* nsap, socklen_t salen,
160                                 const struct timespec timeout);
161 static int retrying_poll(const int sock, short events, const struct timespec* finish);
162 static int res_tls_send(res_state, const Slice query, const Slice answer, int* rcode,
163                         bool* fallback);
164 
getQueryType(const uint8_t * msg,size_t msgLen)165 NsType getQueryType(const uint8_t* msg, size_t msgLen) {
166     ns_msg handle;
167     ns_rr rr;
168     if (ns_initparse((const uint8_t*)msg, msgLen, &handle) < 0 ||
169         ns_parserr(&handle, ns_s_qd, 0, &rr) < 0) {
170         return NS_T_INVALID;
171     }
172     return static_cast<NsType>(ns_rr_type(rr));
173 }
174 
ipFamilyToIPVersion(const int ipFamily)175 IpVersion ipFamilyToIPVersion(const int ipFamily) {
176     switch (ipFamily) {
177         case AF_INET:
178             return IV_IPV4;
179         case AF_INET6:
180             return IV_IPV6;
181         default:
182             return IV_UNKNOWN;
183     }
184 }
185 
186 // BEGIN: Code copied from ISC eventlib
187 // TODO: move away from this code
188 #define BILLION 1000000000
189 
evConsTime(time_t sec,long nsec)190 static struct timespec evConsTime(time_t sec, long nsec) {
191     struct timespec x;
192 
193     x.tv_sec = sec;
194     x.tv_nsec = nsec;
195     return (x);
196 }
197 
evAddTime(struct timespec addend1,struct timespec addend2)198 static struct timespec evAddTime(struct timespec addend1, struct timespec addend2) {
199     struct timespec x;
200 
201     x.tv_sec = addend1.tv_sec + addend2.tv_sec;
202     x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
203     if (x.tv_nsec >= BILLION) {
204         x.tv_sec++;
205         x.tv_nsec -= BILLION;
206     }
207     return (x);
208 }
209 
evSubTime(struct timespec minuend,struct timespec subtrahend)210 static struct timespec evSubTime(struct timespec minuend, struct timespec subtrahend) {
211     struct timespec x;
212 
213     x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
214     if (minuend.tv_nsec >= subtrahend.tv_nsec)
215         x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
216     else {
217         x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
218         x.tv_sec--;
219     }
220     return (x);
221 }
222 
evCmpTime(struct timespec a,struct timespec b)223 static int evCmpTime(struct timespec a, struct timespec b) {
224 #define SGN(x) ((x) < 0 ? (-1) : (x) > 0 ? (1) : (0));
225     time_t s = a.tv_sec - b.tv_sec;
226     long n;
227 
228     if (s != 0) return SGN(s);
229 
230     n = a.tv_nsec - b.tv_nsec;
231     return SGN(n);
232 }
233 
evNowTime(void)234 static struct timespec evNowTime(void) {
235     struct timespec tsnow;
236     clock_gettime(CLOCK_REALTIME, &tsnow);
237     return tsnow;
238 }
239 
240 // END: Code copied from ISC eventlib
241 
242 /* BIONIC-BEGIN: implement source port randomization */
random_bind(int s,int family)243 static int random_bind(int s, int family) {
244     sockaddr_union u;
245     int j;
246     socklen_t slen;
247 
248     /* clear all, this also sets the IP4/6 address to 'any' */
249     memset(&u, 0, sizeof u);
250 
251     switch (family) {
252         case AF_INET:
253             u.sin.sin_family = family;
254             slen = sizeof u.sin;
255             break;
256         case AF_INET6:
257             u.sin6.sin6_family = family;
258             slen = sizeof u.sin6;
259             break;
260         default:
261             errno = EPROTO;
262             return -1;
263     }
264 
265     /* first try to bind to a random source port a few times */
266     for (j = 0; j < 10; j++) {
267         /* find a random port between 1025 .. 65534 */
268         int port = 1025 + (arc4random_uniform(65535 - 1025));
269         if (family == AF_INET)
270             u.sin.sin_port = htons(port);
271         else
272             u.sin6.sin6_port = htons(port);
273 
274         if (!bind(s, &u.sa, slen)) return 0;
275     }
276 
277     // nothing after 10 attempts, our network table is probably busy
278     // let the system decide which port is best
279     if (family == AF_INET)
280         u.sin.sin_port = 0;
281     else
282         u.sin6.sin6_port = 0;
283 
284     return bind(s, &u.sa, slen);
285 }
286 /* BIONIC-END */
287 
288 // Disables all nameservers other than selectedServer
res_set_usable_server(int selectedServer,int nscount,bool usable_servers[])289 static void res_set_usable_server(int selectedServer, int nscount, bool usable_servers[]) {
290     int usableIndex = 0;
291     for (int ns = 0; ns < nscount; ns++) {
292         if (usable_servers[ns]) ++usableIndex;
293         if (usableIndex != selectedServer) usable_servers[ns] = false;
294     }
295 }
296 
297 // Looks up the nameserver address in res.nsaddrs[], returns the ns number if found, otherwise -1.
res_ourserver_p(res_state statp,const sockaddr * sa)298 static int res_ourserver_p(res_state statp, const sockaddr* sa) {
299     const sockaddr_in *inp, *srv;
300     const sockaddr_in6 *in6p, *srv6;
301     int ns = 0;
302     switch (sa->sa_family) {
303         case AF_INET:
304             inp = (const struct sockaddr_in*) (const void*) sa;
305 
306             for (const IPSockAddr& ipsa : statp->nsaddrs) {
307                 sockaddr_storage ss = ipsa;
308                 srv = reinterpret_cast<sockaddr_in*>(&ss);
309                 if (srv->sin_family == inp->sin_family && srv->sin_port == inp->sin_port &&
310                     (srv->sin_addr.s_addr == INADDR_ANY ||
311                      srv->sin_addr.s_addr == inp->sin_addr.s_addr))
312                     return ns;
313                 ++ns;
314             }
315             break;
316         case AF_INET6:
317             in6p = (const struct sockaddr_in6*) (const void*) sa;
318             for (const IPSockAddr& ipsa : statp->nsaddrs) {
319                 sockaddr_storage ss = ipsa;
320                 srv6 = reinterpret_cast<sockaddr_in6*>(&ss);
321                 if (srv6->sin6_family == in6p->sin6_family && srv6->sin6_port == in6p->sin6_port &&
322 #ifdef HAVE_SIN6_SCOPE_ID
323                     (srv6->sin6_scope_id == 0 || srv6->sin6_scope_id == in6p->sin6_scope_id) &&
324 #endif
325                     (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
326                      IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
327                     return ns;
328                 ++ns;
329             }
330             break;
331         default:
332             break;
333     }
334     return -1;
335 }
336 
337 /* int
338  * res_nameinquery(name, type, cl, buf, eom)
339  *	look for (name, type, cl) in the query section of packet (buf, eom)
340  * requires:
341  *	buf + HFIXEDSZ <= eom
342  * returns:
343  *	-1 : format error
344  *	0  : not found
345  *	>0 : found
346  * author:
347  *	paul vixie, 29may94
348  */
res_nameinquery(const char * name,int type,int cl,const uint8_t * buf,const uint8_t * eom)349 int res_nameinquery(const char* name, int type, int cl, const uint8_t* buf, const uint8_t* eom) {
350     const uint8_t* cp = buf + HFIXEDSZ;
351     int qdcount = ntohs(((const HEADER*) (const void*) buf)->qdcount);
352 
353     while (qdcount-- > 0) {
354         char tname[MAXDNAME + 1];
355         int n = dn_expand(buf, eom, cp, tname, sizeof tname);
356         if (n < 0) return (-1);
357         cp += n;
358         if (cp + 2 * INT16SZ > eom) return (-1);
359         int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
360         cp += INT16SZ;
361         int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
362         cp += INT16SZ;
363         if (ttype == type && tclass == cl && ns_samename(tname, name) == 1) return (1);
364     }
365     return (0);
366 }
367 
368 /* int
369  * res_queriesmatch(buf1, eom1, buf2, eom2)
370  *	is there a 1:1 mapping of (name,type,class)
371  *	in (buf1,eom1) and (buf2,eom2)?
372  * returns:
373  *	-1 : format error
374  *	0  : not a 1:1 mapping
375  *	>0 : is a 1:1 mapping
376  * author:
377  *	paul vixie, 29may94
378  */
res_queriesmatch(const uint8_t * buf1,const uint8_t * eom1,const uint8_t * buf2,const uint8_t * eom2)379 int res_queriesmatch(const uint8_t* buf1, const uint8_t* eom1, const uint8_t* buf2,
380                      const uint8_t* eom2) {
381     const uint8_t* cp = buf1 + HFIXEDSZ;
382     int qdcount = ntohs(((const HEADER*) (const void*) buf1)->qdcount);
383 
384     if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2) return (-1);
385 
386     /*
387      * Only header section present in replies to
388      * dynamic update packets.
389      */
390     if ((((const HEADER*) (const void*) buf1)->opcode == ns_o_update) &&
391         (((const HEADER*) (const void*) buf2)->opcode == ns_o_update))
392         return (1);
393 
394     if (qdcount != ntohs(((const HEADER*) (const void*) buf2)->qdcount)) return (0);
395     while (qdcount-- > 0) {
396         char tname[MAXDNAME + 1];
397         int n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
398         if (n < 0) return (-1);
399         cp += n;
400         if (cp + 2 * INT16SZ > eom1) return (-1);
401         int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
402         cp += INT16SZ;
403         int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
404         cp += INT16SZ;
405         if (!res_nameinquery(tname, ttype, tclass, buf2, eom2)) return (0);
406     }
407     return (1);
408 }
409 
addDnsQueryEvent(NetworkDnsEventReported * event)410 static DnsQueryEvent* addDnsQueryEvent(NetworkDnsEventReported* event) {
411     return event->mutable_dns_query_events()->add_dns_query_event();
412 }
413 
isNetworkRestricted(int terrno)414 static bool isNetworkRestricted(int terrno) {
415     // It's possible that system was in some network restricted mode, which blocked
416     // the operation of sending packet and resulted in EPERM errno.
417     // It would be no reason to keep retrying on that case.
418     // TODO: Check the system status to know if network restricted mode is
419     // enabled.
420     return (terrno == EPERM);
421 }
422 
res_nsend(res_state statp,const uint8_t * buf,int buflen,uint8_t * ans,int anssiz,int * rcode,uint32_t flags,std::chrono::milliseconds sleepTimeMs)423 int res_nsend(res_state statp, const uint8_t* buf, int buflen, uint8_t* ans, int anssiz, int* rcode,
424               uint32_t flags, std::chrono::milliseconds sleepTimeMs) {
425     LOG(DEBUG) << __func__;
426 
427     // Should not happen
428     if (anssiz < HFIXEDSZ) {
429         // TODO: Remove errno once callers stop using it
430         errno = EINVAL;
431         return -EINVAL;
432     }
433     res_pquery(buf, buflen);
434 
435     int anslen = 0;
436     Stopwatch cacheStopwatch;
437     ResolvCacheStatus cache_status =
438             resolv_cache_lookup(statp->netid, buf, buflen, ans, anssiz, &anslen, flags);
439     const int32_t cacheLatencyUs = saturate_cast<int32_t>(cacheStopwatch.timeTakenUs());
440     if (cache_status == RESOLV_CACHE_FOUND) {
441         HEADER* hp = (HEADER*)(void*)ans;
442         *rcode = hp->rcode;
443         DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
444         dnsQueryEvent->set_latency_micros(cacheLatencyUs);
445         dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
446         dnsQueryEvent->set_type(getQueryType(buf, buflen));
447         return anslen;
448     } else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
449         // had a cache miss for a known network, so populate the thread private
450         // data so the normal resolve path can do its thing
451         resolv_populate_res_for_net(statp);
452     }
453     if (statp->nameserverCount() == 0) {
454         // We have no nameservers configured, so there's no point trying.
455         // Tell the cache the query failed, or any retries and anyone else asking the same
456         // question will block for PENDING_REQUEST_TIMEOUT seconds instead of failing fast.
457         _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
458 
459         // TODO: Remove errno once callers stop using it
460         errno = ESRCH;
461         return -ESRCH;
462     }
463 
464     // If parallel_lookup is enabled, it might be required to wait some time to avoid
465     // gateways drop packets if queries are sent too close together
466     if (sleepTimeMs != 0ms) {
467         std::this_thread::sleep_for(sleepTimeMs);
468     }
469     // DoT
470     if (!(statp->netcontext_flags & NET_CONTEXT_FLAG_USE_LOCAL_NAMESERVERS)) {
471         bool fallback = false;
472         int resplen = res_tls_send(statp, Slice(const_cast<uint8_t*>(buf), buflen),
473                                    Slice(ans, anssiz), rcode, &fallback);
474         if (resplen > 0) {
475             LOG(DEBUG) << __func__ << ": got answer from DoT";
476             res_pquery(ans, resplen);
477             if (cache_status == RESOLV_CACHE_NOTFOUND) {
478                 resolv_cache_add(statp->netid, buf, buflen, ans, resplen);
479             }
480             return resplen;
481         }
482         if (!fallback) {
483             _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
484             return -ETIMEDOUT;
485         }
486     }
487 
488     res_stats stats[MAXNS]{};
489     res_params params;
490     int revision_id = resolv_cache_get_resolver_stats(statp->netid, &params, stats, statp->nsaddrs);
491     if (revision_id < 0) {
492         // TODO: Remove errno once callers stop using it
493         errno = ESRCH;
494         return -ESRCH;
495     }
496     bool usable_servers[MAXNS];
497     int usableServersCount = android_net_res_stats_get_usable_servers(
498             &params, stats, statp->nameserverCount(), usable_servers);
499 
500     if ((flags & ANDROID_RESOLV_NO_RETRY) && usableServersCount > 1) {
501         auto hp = reinterpret_cast<const HEADER*>(buf);
502 
503         // Select a random server based on the query id
504         int selectedServer = (hp->id % usableServersCount) + 1;
505         res_set_usable_server(selectedServer, statp->nameserverCount(), usable_servers);
506     }
507 
508     // Send request, RETRY times, or until successful.
509     int retryTimes = (flags & ANDROID_RESOLV_NO_RETRY) ? 1 : params.retry_count;
510     int useTcp = buflen > PACKETSZ;
511     int gotsomewhere = 0;
512     // Use an impossible error code as default value
513     int terrno = ETIME;
514 
515     for (int attempt = 0; attempt < retryTimes; ++attempt) {
516         for (size_t ns = 0; ns < statp->nsaddrs.size(); ++ns) {
517             if (!usable_servers[ns]) continue;
518 
519             *rcode = RCODE_INTERNAL_ERROR;
520 
521             // Get server addr
522             const IPSockAddr& serverSockAddr = statp->nsaddrs[ns];
523             LOG(DEBUG) << __func__ << ": Querying server (# " << ns + 1
524                        << ") address = " << serverSockAddr.toString();
525 
526             ::android::net::Protocol query_proto = useTcp ? PROTO_TCP : PROTO_UDP;
527             time_t query_time = 0;
528             int delay = 0;
529             bool fallbackTCP = false;
530             const bool shouldRecordStats = (attempt == 0);
531             int resplen;
532             Stopwatch queryStopwatch;
533             int retry_count_for_event = 0;
534             size_t actualNs = ns;
535             // Use an impossible error code as default value
536             terrno = ETIME;
537             if (useTcp) {
538                 // TCP; at most one attempt per server.
539                 attempt = retryTimes;
540                 resplen = send_vc(statp, &params, buf, buflen, ans, anssiz, &terrno, ns,
541                                   &query_time, rcode, &delay);
542 
543                 if (buflen <= PACKETSZ && resplen <= 0 &&
544                     statp->tc_mode == aidl::android::net::IDnsResolver::TC_MODE_UDP_TCP) {
545                     // reset to UDP for next query on next DNS server if resolver is currently doing
546                     // TCP fallback retry and current server does not support TCP connectin
547                     useTcp = false;
548                 }
549                 LOG(INFO) << __func__ << ": used send_vc " << resplen << " terrno: " << terrno;
550             } else {
551                 // UDP
552                 resplen = send_dg(statp, &params, buf, buflen, ans, anssiz, &terrno, &actualNs,
553                                   &useTcp, &gotsomewhere, &query_time, rcode, &delay);
554                 fallbackTCP = useTcp ? true : false;
555                 retry_count_for_event = attempt;
556                 LOG(INFO) << __func__ << ": used send_dg " << resplen << " terrno: " << terrno;
557             }
558 
559             const IPSockAddr& receivedServerAddr = statp->nsaddrs[actualNs];
560             DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
561             dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
562             // When |retryTimes| > 1, we cannot actually know the correct latency value if we
563             // received the answer from the previous server. So temporarily set the latency as -1 if
564             // that condition happened.
565             // TODO: make the latency value accurate.
566             dnsQueryEvent->set_latency_micros(
567                     (actualNs == ns) ? saturate_cast<int32_t>(queryStopwatch.timeTakenUs()) : -1);
568             dnsQueryEvent->set_dns_server_index(actualNs);
569             dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(receivedServerAddr.family()));
570             dnsQueryEvent->set_retry_times(retry_count_for_event);
571             dnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
572             dnsQueryEvent->set_protocol(query_proto);
573             dnsQueryEvent->set_type(getQueryType(buf, buflen));
574             dnsQueryEvent->set_linux_errno(static_cast<LinuxErrno>(terrno));
575 
576             // Only record stats the first time we try a query. This ensures that
577             // queries that deterministically fail (e.g., a name that always returns
578             // SERVFAIL or times out) do not unduly affect the stats.
579             if (shouldRecordStats) {
580                 // (b/151166599): This is a workaround to prevent that DnsResolver calculates the
581                 // reliability of DNS servers from being broken when network restricted mode is
582                 // enabled.
583                 // TODO: Introduce the new server selection instead of skipping stats recording.
584                 if (!isNetworkRestricted(terrno)) {
585                     res_sample sample;
586                     res_stats_set_sample(&sample, query_time, *rcode, delay);
587                     // KeepListening UDP mechanism is incompatible with usable_servers of legacy
588                     // stats, so keep the old logic for now.
589                     // TODO: Replace usable_servers of legacy stats with new one.
590                     resolv_cache_add_resolver_stats_sample(
591                             statp->netid, revision_id, serverSockAddr, sample, params.max_samples);
592                 }
593                 resolv_stats_add(statp->netid, receivedServerAddr, dnsQueryEvent);
594             }
595 
596             if (resplen == 0) continue;
597             if (fallbackTCP) {
598                 ns--;
599                 continue;
600             }
601             if (resplen < 0) {
602                 _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
603                 statp->closeSockets();
604                 return -terrno;
605             };
606 
607             LOG(DEBUG) << __func__ << ": got answer:";
608             res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
609 
610             if (cache_status == RESOLV_CACHE_NOTFOUND) {
611                 resolv_cache_add(statp->netid, buf, buflen, ans, resplen);
612             }
613             statp->closeSockets();
614             return (resplen);
615         }  // for each ns
616     }  // for each retry
617     statp->closeSockets();
618     terrno = useTcp ? terrno : gotsomewhere ? ETIMEDOUT : ECONNREFUSED;
619     // TODO: Remove errno once callers stop using it
620     errno = useTcp ? terrno
621                    : gotsomewhere ? ETIMEDOUT /* no answer obtained */
622                                   : ECONNREFUSED /* no nameservers found */;
623 
624     _resolv_cache_query_failed(statp->netid, buf, buflen, flags);
625     return -terrno;
626 }
627 
get_timeout(res_state statp,const res_params * params,const int ns)628 static struct timespec get_timeout(res_state statp, const res_params* params, const int ns) {
629     int msec;
630     // Legacy algorithm which scales the timeout by nameserver number.
631     // For instance, with 4 nameservers: 5s, 2.5s, 5s, 10s
632     // This has no effect with 1 or 2 nameservers
633     msec = params->base_timeout_msec << ns;
634     if (ns > 0) {
635         msec /= statp->nameserverCount();
636     }
637     // For safety, don't allow OEMs and experiments to configure a timeout shorter than 1s.
638     if (msec < 1000) {
639         msec = 1000;  // Use at least 1000ms
640     }
641     LOG(INFO) << __func__ << ": using timeout of " << msec << " msec";
642 
643     struct timespec result;
644     result.tv_sec = msec / 1000;
645     result.tv_nsec = (msec % 1000) * 1000000;
646     return result;
647 }
648 
send_vc(res_state statp,res_params * params,const uint8_t * buf,int buflen,uint8_t * ans,int anssiz,int * terrno,size_t ns,time_t * at,int * rcode,int * delay)649 static int send_vc(res_state statp, res_params* params, const uint8_t* buf, int buflen,
650                    uint8_t* ans, int anssiz, int* terrno, size_t ns, time_t* at, int* rcode,
651                    int* delay) {
652     *at = time(NULL);
653     *delay = 0;
654     const HEADER* hp = (const HEADER*) (const void*) buf;
655     HEADER* anhp = (HEADER*) (void*) ans;
656     struct sockaddr* nsap;
657     int nsaplen;
658     int truncating, connreset, n;
659     uint8_t* cp;
660 
661     LOG(INFO) << __func__ << ": using send_vc";
662 
663     // It should never happen, but just in case.
664     if (ns >= statp->nsaddrs.size()) {
665         LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
666         *terrno = EINVAL;
667         return -1;
668     }
669 
670     sockaddr_storage ss = statp->nsaddrs[ns];
671     nsap = reinterpret_cast<sockaddr*>(&ss);
672     nsaplen = sockaddrSize(nsap);
673 
674     connreset = 0;
675 same_ns:
676     truncating = 0;
677 
678     struct timespec start_time = evNowTime();
679 
680     /* Are we still talking to whom we want to talk to? */
681     if (statp->tcp_nssock >= 0 && (statp->_flags & RES_F_VC) != 0) {
682         struct sockaddr_storage peer;
683         socklen_t size = sizeof peer;
684         unsigned old_mark;
685         socklen_t mark_size = sizeof(old_mark);
686         if (getpeername(statp->tcp_nssock, (struct sockaddr*)(void*)&peer, &size) < 0 ||
687             !sock_eq((struct sockaddr*)(void*)&peer, nsap) ||
688             getsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &old_mark, &mark_size) < 0 ||
689             old_mark != statp->_mark) {
690             statp->closeSockets();
691         }
692     }
693 
694     if (statp->tcp_nssock < 0 || (statp->_flags & RES_F_VC) == 0) {
695         if (statp->tcp_nssock >= 0) statp->closeSockets();
696 
697         statp->tcp_nssock.reset(socket(nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0));
698         if (statp->tcp_nssock < 0) {
699             *terrno = errno;
700             PLOG(DEBUG) << __func__ << ": socket(vc): ";
701             switch (errno) {
702                 case EPROTONOSUPPORT:
703                 case EPFNOSUPPORT:
704                 case EAFNOSUPPORT:
705                     return 0;
706                 default:
707                     return -1;
708             }
709         }
710         const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
711         resolv_tag_socket(statp->tcp_nssock, uid, statp->pid);
712         if (statp->_mark != MARK_UNSET) {
713             if (setsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &statp->_mark,
714                            sizeof(statp->_mark)) < 0) {
715                 *terrno = errno;
716                 PLOG(DEBUG) << __func__ << ": setsockopt: ";
717                 return -1;
718             }
719         }
720         errno = 0;
721         if (random_bind(statp->tcp_nssock, nsap->sa_family) < 0) {
722             *terrno = errno;
723             dump_error("bind/vc", nsap, nsaplen);
724             statp->closeSockets();
725             return (0);
726         }
727         if (connect_with_timeout(statp->tcp_nssock, nsap, (socklen_t)nsaplen,
728                                  get_timeout(statp, params, ns)) < 0) {
729             *terrno = errno;
730             dump_error("connect/vc", nsap, nsaplen);
731             statp->closeSockets();
732             /*
733              * The way connect_with_timeout() is implemented prevents us from reliably
734              * determining whether this was really a timeout or e.g. ECONNREFUSED. Since
735              * currently both cases are handled in the same way, there is no need to
736              * change this (yet). If we ever need to reliably distinguish between these
737              * cases, both connect_with_timeout() and retrying_poll() need to be
738              * modified, though.
739              */
740             *rcode = RCODE_TIMEOUT;
741             return (0);
742         }
743         statp->_flags |= RES_F_VC;
744     }
745 
746     /*
747      * Send length & message
748      */
749     uint16_t len = htons(static_cast<uint16_t>(buflen));
750     const iovec iov[] = {
751             {.iov_base = &len, .iov_len = INT16SZ},
752             {.iov_base = const_cast<uint8_t*>(buf), .iov_len = static_cast<size_t>(buflen)},
753     };
754     if (writev(statp->tcp_nssock, iov, 2) != (INT16SZ + buflen)) {
755         *terrno = errno;
756         PLOG(DEBUG) << __func__ << ": write failed: ";
757         statp->closeSockets();
758         return (0);
759     }
760     /*
761      * Receive length & response
762      */
763 read_len:
764     cp = ans;
765     len = INT16SZ;
766     while ((n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
767         cp += n;
768         if ((len -= n) == 0) break;
769     }
770     if (n <= 0) {
771         *terrno = errno;
772         PLOG(DEBUG) << __func__ << ": read failed: ";
773         statp->closeSockets();
774         /*
775          * A long running process might get its TCP
776          * connection reset if the remote server was
777          * restarted.  Requery the server instead of
778          * trying a new one.  When there is only one
779          * server, this means that a query might work
780          * instead of failing.  We only allow one reset
781          * per query to prevent looping.
782          */
783         if (*terrno == ECONNRESET && !connreset) {
784             connreset = 1;
785             goto same_ns;
786         }
787         return (0);
788     }
789     uint16_t resplen = ntohs(*reinterpret_cast<const uint16_t*>(ans));
790     if (resplen > anssiz) {
791         LOG(DEBUG) << __func__ << ": response truncated";
792         truncating = 1;
793         len = anssiz;
794     } else
795         len = resplen;
796     if (len < HFIXEDSZ) {
797         /*
798          * Undersized message.
799          */
800         LOG(DEBUG) << __func__ << ": undersized: " << len;
801         *terrno = EMSGSIZE;
802         statp->closeSockets();
803         return (0);
804     }
805     cp = ans;
806     while (len != 0 && (n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
807         cp += n;
808         len -= n;
809     }
810     if (n <= 0) {
811         *terrno = errno;
812         PLOG(DEBUG) << __func__ << ": read(vc): ";
813         statp->closeSockets();
814         return (0);
815     }
816 
817     if (truncating) {
818         /*
819          * Flush rest of answer so connection stays in synch.
820          */
821         anhp->tc = 1;
822         len = resplen - anssiz;
823         while (len != 0) {
824             char junk[PACKETSZ];
825 
826             n = read(statp->tcp_nssock, junk, (len > sizeof junk) ? sizeof junk : len);
827             if (n > 0)
828                 len -= n;
829             else
830                 break;
831         }
832     }
833     /*
834      * If the calling application has bailed out of
835      * a previous call and failed to arrange to have
836      * the circuit closed or the server has got
837      * itself confused, then drop the packet and
838      * wait for the correct one.
839      */
840     if (hp->id != anhp->id) {
841         LOG(DEBUG) << __func__ << ": ld answer (unexpected):";
842         res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
843         goto read_len;
844     }
845 
846     /*
847      * All is well, or the error is fatal.  Signal that the
848      * next nameserver ought not be tried.
849      */
850     if (resplen > 0) {
851         struct timespec done = evNowTime();
852         *delay = res_stats_calculate_rtt(&done, &start_time);
853         *rcode = anhp->rcode;
854     }
855     *terrno = 0;
856     return (resplen);
857 }
858 
859 /* return -1 on error (errno set), 0 on success */
connect_with_timeout(int sock,const sockaddr * nsap,socklen_t salen,const timespec timeout)860 static int connect_with_timeout(int sock, const sockaddr* nsap, socklen_t salen,
861                                 const timespec timeout) {
862     int res, origflags;
863 
864     origflags = fcntl(sock, F_GETFL, 0);
865     fcntl(sock, F_SETFL, origflags | O_NONBLOCK);
866 
867     res = connect(sock, nsap, salen);
868     if (res < 0 && errno != EINPROGRESS) {
869         res = -1;
870         goto done;
871     }
872     if (res != 0) {
873         timespec now = evNowTime();
874         timespec finish = evAddTime(now, timeout);
875         LOG(INFO) << __func__ << ": " << sock << " send_vc";
876         res = retrying_poll(sock, POLLIN | POLLOUT, &finish);
877         if (res <= 0) {
878             res = -1;
879         }
880     }
881 done:
882     fcntl(sock, F_SETFL, origflags);
883     LOG(INFO) << __func__ << ": " << sock << " connect_with_const timeout returning " << res;
884     return res;
885 }
886 
retrying_poll(const int sock,const short events,const struct timespec * finish)887 static int retrying_poll(const int sock, const short events, const struct timespec* finish) {
888     struct timespec now, timeout;
889 
890 retry:
891     LOG(INFO) << __func__ << ": " << sock << " retrying_poll";
892 
893     now = evNowTime();
894     if (evCmpTime(*finish, now) > 0)
895         timeout = evSubTime(*finish, now);
896     else
897         timeout = evConsTime(0L, 0L);
898     struct pollfd fds = {.fd = sock, .events = events};
899     int n = ppoll(&fds, 1, &timeout, /*__mask=*/NULL);
900     if (n == 0) {
901         LOG(INFO) << __func__ << ": " << sock << " retrying_poll timeout";
902         errno = ETIMEDOUT;
903         return 0;
904     }
905     if (n < 0) {
906         if (errno == EINTR) goto retry;
907         PLOG(INFO) << __func__ << ": " << sock << " retrying_poll failed";
908         return n;
909     }
910     if (fds.revents & (POLLIN | POLLOUT | POLLERR)) {
911         int error;
912         socklen_t len = sizeof(error);
913         if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0 || error) {
914             errno = error;
915             PLOG(INFO) << __func__ << ": " << sock << " retrying_poll getsockopt failed";
916             return -1;
917         }
918     }
919     LOG(INFO) << __func__ << ": " << sock << " retrying_poll returning " << n;
920     return n;
921 }
922 
extractUdpFdset(res_state statp,const short events=POLLIN)923 static std::vector<pollfd> extractUdpFdset(res_state statp, const short events = POLLIN) {
924     std::vector<pollfd> fdset(statp->nsaddrs.size());
925     for (size_t i = 0; i < statp->nsaddrs.size(); ++i) {
926         fdset[i] = {.fd = statp->nssocks[i], .events = events};
927     }
928     return fdset;
929 }
930 
udpRetryingPoll(res_state statp,const timespec * finish)931 static Result<std::vector<int>> udpRetryingPoll(res_state statp, const timespec* finish) {
932     for (;;) {
933         LOG(DEBUG) << __func__ << ": poll";
934         timespec start_time = evNowTime();
935         timespec timeout = (evCmpTime(*finish, start_time) > 0) ? evSubTime(*finish, start_time)
936                                                                 : evConsTime(0L, 0L);
937         std::vector<pollfd> fdset = extractUdpFdset(statp);
938         const int n = ppoll(fdset.data(), fdset.size(), &timeout, /*__mask=*/nullptr);
939         if (n <= 0) {
940             if (errno == EINTR && n < 0) continue;
941             if (n == 0) errno = ETIMEDOUT;
942             PLOG(INFO) << __func__ << ": failed";
943             return ErrnoError();
944         }
945         std::vector<int> fdsToRead;
946         for (const auto& pollfd : fdset) {
947             if (pollfd.revents & (POLLIN | POLLERR)) {
948                 fdsToRead.push_back(pollfd.fd);
949             }
950         }
951         LOG(DEBUG) << __func__ << ": "
952                    << " returning fd size: " << fdsToRead.size();
953         return fdsToRead;
954     }
955 }
956 
udpRetryingPollWrapper(res_state statp,int ns,const timespec * finish)957 static Result<std::vector<int>> udpRetryingPollWrapper(res_state statp, int ns,
958                                                        const timespec* finish) {
959     const bool keepListeningUdp =
960             android::net::Experiments::getInstance()->getFlag("keep_listening_udp", 0);
961     if (keepListeningUdp) return udpRetryingPoll(statp, finish);
962 
963     if (int n = retrying_poll(statp->nssocks[ns], POLLIN, finish); n <= 0) {
964         return ErrnoError();
965     }
966     return std::vector<int>{statp->nssocks[ns]};
967 }
968 
ignoreInvalidAnswer(res_state statp,const sockaddr_storage & from,const uint8_t * buf,int buflen,uint8_t * ans,int anssiz,int * receivedFromNs)969 bool ignoreInvalidAnswer(res_state statp, const sockaddr_storage& from, const uint8_t* buf,
970                          int buflen, uint8_t* ans, int anssiz, int* receivedFromNs) {
971     const HEADER* hp = (const HEADER*)(const void*)buf;
972     HEADER* anhp = (HEADER*)(void*)ans;
973     if (hp->id != anhp->id) {
974         // response from old query, ignore it.
975         LOG(DEBUG) << __func__ << ": old answer:";
976         return true;
977     }
978     if (*receivedFromNs = res_ourserver_p(statp, (sockaddr*)(void*)&from); *receivedFromNs < 0) {
979         // response from wrong server? ignore it.
980         LOG(DEBUG) << __func__ << ": not our server:";
981         return true;
982     }
983     if (!res_queriesmatch(buf, buf + buflen, ans, ans + anssiz)) {
984         // response contains wrong query? ignore it.
985         LOG(DEBUG) << __func__ << ": wrong query name:";
986         return true;
987     }
988     return false;
989 }
990 
send_dg(res_state statp,res_params * params,const uint8_t * buf,int buflen,uint8_t * ans,int anssiz,int * terrno,size_t * ns,int * v_circuit,int * gotsomewhere,time_t * at,int * rcode,int * delay)991 static int send_dg(res_state statp, res_params* params, const uint8_t* buf, int buflen,
992                    uint8_t* ans, int anssiz, int* terrno, size_t* ns, int* v_circuit,
993                    int* gotsomewhere, time_t* at, int* rcode, int* delay) {
994     // It should never happen, but just in case.
995     if (*ns >= statp->nsaddrs.size()) {
996         LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
997         *terrno = EINVAL;
998         return -1;
999     }
1000 
1001     *at = time(nullptr);
1002     *delay = 0;
1003     const sockaddr_storage ss = statp->nsaddrs[*ns];
1004     const sockaddr* nsap = reinterpret_cast<const sockaddr*>(&ss);
1005     const int nsaplen = sockaddrSize(nsap);
1006 
1007     if (statp->nssocks[*ns] == -1) {
1008         statp->nssocks[*ns].reset(socket(nsap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0));
1009         if (statp->nssocks[*ns] < 0) {
1010             *terrno = errno;
1011             PLOG(DEBUG) << __func__ << ": socket(dg): ";
1012             switch (errno) {
1013                 case EPROTONOSUPPORT:
1014                 case EPFNOSUPPORT:
1015                 case EAFNOSUPPORT:
1016                     return (0);
1017                 default:
1018                     return (-1);
1019             }
1020         }
1021 
1022         const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
1023         resolv_tag_socket(statp->nssocks[*ns], uid, statp->pid);
1024         if (statp->_mark != MARK_UNSET) {
1025             if (setsockopt(statp->nssocks[*ns], SOL_SOCKET, SO_MARK, &(statp->_mark),
1026                            sizeof(statp->_mark)) < 0) {
1027                 *terrno = errno;
1028                 statp->closeSockets();
1029                 return -1;
1030             }
1031         }
1032         // Use a "connected" datagram socket to receive an ECONNREFUSED error
1033         // on the next socket operation when the server responds with an
1034         // ICMP port-unreachable error. This way we can detect the absence of
1035         // a nameserver without timing out.
1036         if (random_bind(statp->nssocks[*ns], nsap->sa_family) < 0) {
1037             *terrno = errno;
1038             dump_error("bind(dg)", nsap, nsaplen);
1039             statp->closeSockets();
1040             return (0);
1041         }
1042         if (connect(statp->nssocks[*ns], nsap, (socklen_t)nsaplen) < 0) {
1043             *terrno = errno;
1044             dump_error("connect(dg)", nsap, nsaplen);
1045             statp->closeSockets();
1046             return (0);
1047         }
1048         LOG(DEBUG) << __func__ << ": new DG socket";
1049     }
1050     if (send(statp->nssocks[*ns], (const char*)buf, (size_t)buflen, 0) != buflen) {
1051         *terrno = errno;
1052         PLOG(DEBUG) << __func__ << ": send: ";
1053         statp->closeSockets();
1054         return 0;
1055     }
1056 
1057     timespec timeout = get_timeout(statp, params, *ns);
1058     timespec start_time = evNowTime();
1059     timespec finish = evAddTime(start_time, timeout);
1060     for (;;) {
1061         // Wait for reply.
1062         auto result = udpRetryingPollWrapper(statp, *ns, &finish);
1063 
1064         if (!result.has_value()) {
1065             const bool isTimeout = (result.error().code() == ETIMEDOUT);
1066             *rcode = (isTimeout) ? RCODE_TIMEOUT : *rcode;
1067             *terrno = (isTimeout) ? ETIMEDOUT : errno;
1068             *gotsomewhere = (isTimeout) ? 1 : *gotsomewhere;
1069             // Leave the UDP sockets open on timeout so we can keep listening for
1070             // a late response from this server while retrying on the next server.
1071             if (!isTimeout) statp->closeSockets();
1072             LOG(DEBUG) << __func__ << ": " << (isTimeout) ? "timeout" : "poll";
1073             return 0;
1074         }
1075         bool needRetry = false;
1076         for (int fd : result.value()) {
1077             needRetry = false;
1078             sockaddr_storage from;
1079             socklen_t fromlen = sizeof(from);
1080             int resplen =
1081                     recvfrom(fd, (char*)ans, (size_t)anssiz, 0, (sockaddr*)(void*)&from, &fromlen);
1082             if (resplen <= 0) {
1083                 *terrno = errno;
1084                 PLOG(DEBUG) << __func__ << ": recvfrom: ";
1085                 continue;
1086             }
1087             *gotsomewhere = 1;
1088             if (resplen < HFIXEDSZ) {
1089                 // Undersized message.
1090                 LOG(DEBUG) << __func__ << ": undersized: " << resplen;
1091                 *terrno = EMSGSIZE;
1092                 continue;
1093             }
1094 
1095             int receivedFromNs = *ns;
1096             if (needRetry =
1097                         ignoreInvalidAnswer(statp, from, buf, buflen, ans, anssiz, &receivedFromNs);
1098                 needRetry) {
1099                 res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
1100                 continue;
1101             }
1102 
1103             HEADER* anhp = (HEADER*)(void*)ans;
1104             if (anhp->rcode == FORMERR && (statp->netcontext_flags & NET_CONTEXT_FLAG_USE_EDNS)) {
1105                 //  Do not retry if the server do not understand EDNS0.
1106                 //  The case has to be captured here, as FORMERR packet do not
1107                 //  carry query section, hence res_queriesmatch() returns 0.
1108                 LOG(DEBUG) << __func__ << ": server rejected query with EDNS0:";
1109                 res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
1110                 // record the error
1111                 statp->_flags |= RES_F_EDNS0ERR;
1112                 *terrno = EREMOTEIO;
1113                 continue;
1114             }
1115 
1116             timespec done = evNowTime();
1117             *delay = res_stats_calculate_rtt(&done, &start_time);
1118             if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
1119                 LOG(DEBUG) << __func__ << ": server rejected query:";
1120                 res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
1121                 *rcode = anhp->rcode;
1122                 continue;
1123             }
1124             if (anhp->tc) {
1125                 // To get the rest of answer,
1126                 // use TCP with same server.
1127                 LOG(DEBUG) << __func__ << ": truncated answer";
1128                 *terrno = E2BIG;
1129                 *v_circuit = 1;
1130                 return 1;
1131             }
1132             // All is well, or the error is fatal. Signal that the
1133             // next nameserver ought not be tried.
1134 
1135             *rcode = anhp->rcode;
1136             *ns = receivedFromNs;
1137             *terrno = 0;
1138             return resplen;
1139         }
1140         if (!needRetry) return 0;
1141     }
1142 }
1143 
dump_error(const char * str,const struct sockaddr * address,int alen)1144 static void dump_error(const char* str, const struct sockaddr* address, int alen) {
1145     char hbuf[NI_MAXHOST];
1146     char sbuf[NI_MAXSERV];
1147     constexpr int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
1148     const int err = errno;
1149 
1150     if (!WOULD_LOG(DEBUG)) return;
1151 
1152     if (getnameinfo(address, (socklen_t)alen, hbuf, sizeof(hbuf), sbuf, sizeof(sbuf), niflags)) {
1153         strncpy(hbuf, "?", sizeof(hbuf) - 1);
1154         hbuf[sizeof(hbuf) - 1] = '\0';
1155         strncpy(sbuf, "?", sizeof(sbuf) - 1);
1156         sbuf[sizeof(sbuf) - 1] = '\0';
1157     }
1158     errno = err;
1159     PLOG(DEBUG) << __func__ << ": " << str << " ([" << hbuf << "]." << sbuf << "): ";
1160 }
1161 
sock_eq(struct sockaddr * a,struct sockaddr * b)1162 static int sock_eq(struct sockaddr* a, struct sockaddr* b) {
1163     struct sockaddr_in *a4, *b4;
1164     struct sockaddr_in6 *a6, *b6;
1165 
1166     if (a->sa_family != b->sa_family) return 0;
1167     switch (a->sa_family) {
1168         case AF_INET:
1169             a4 = (struct sockaddr_in*) (void*) a;
1170             b4 = (struct sockaddr_in*) (void*) b;
1171             return a4->sin_port == b4->sin_port && a4->sin_addr.s_addr == b4->sin_addr.s_addr;
1172         case AF_INET6:
1173             a6 = (struct sockaddr_in6*) (void*) a;
1174             b6 = (struct sockaddr_in6*) (void*) b;
1175             return a6->sin6_port == b6->sin6_port &&
1176 #ifdef HAVE_SIN6_SCOPE_ID
1177                    a6->sin6_scope_id == b6->sin6_scope_id &&
1178 #endif
1179                    IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
1180         default:
1181             return 0;
1182     }
1183 }
1184 
convertEnumType(PrivateDnsMode privateDnsmode)1185 PrivateDnsModes convertEnumType(PrivateDnsMode privateDnsmode) {
1186     switch (privateDnsmode) {
1187         case PrivateDnsMode::OFF:
1188             return PrivateDnsModes::PDM_OFF;
1189         case PrivateDnsMode::OPPORTUNISTIC:
1190             return PrivateDnsModes::PDM_OPPORTUNISTIC;
1191         case PrivateDnsMode::STRICT:
1192             return PrivateDnsModes::PDM_STRICT;
1193         default:
1194             return PrivateDnsModes::PDM_UNKNOWN;
1195     }
1196 }
1197 
res_tls_send(res_state statp,const Slice query,const Slice answer,int * rcode,bool * fallback)1198 static int res_tls_send(res_state statp, const Slice query, const Slice answer, int* rcode,
1199                         bool* fallback) {
1200     int resplen = 0;
1201     const unsigned netId = statp->netid;
1202 
1203     PrivateDnsStatus privateDnsStatus = gPrivateDnsConfiguration.getStatus(netId);
1204     statp->event->set_private_dns_modes(convertEnumType(privateDnsStatus.mode));
1205 
1206     if (privateDnsStatus.mode == PrivateDnsMode::OFF) {
1207         *fallback = true;
1208         return -1;
1209     }
1210 
1211     if (privateDnsStatus.validatedServers().empty()) {
1212         if (privateDnsStatus.mode == PrivateDnsMode::OPPORTUNISTIC) {
1213             *fallback = true;
1214             return -1;
1215         } else {
1216             // Sleep and iterate some small number of times checking for the
1217             // arrival of resolved and validated server IP addresses, instead
1218             // of returning an immediate error.
1219             // This is needed because as soon as a network becomes the default network, apps will
1220             // send DNS queries on that network. If no servers have yet validated, and we do not
1221             // block those queries, they would immediately fail, causing application-visible errors.
1222             // Note that this can happen even before the network validates, since an unvalidated
1223             // network can become the default network if no validated networks are available.
1224             //
1225             // TODO: see if there is a better way to address this problem, such as buffering the
1226             // queries in a queue or only blocking queries for the first few seconds after a default
1227             // network change.
1228             for (int i = 0; i < 42; i++) {
1229                 std::this_thread::sleep_for(std::chrono::milliseconds(100));
1230                 // Calling getStatus() to merely check if there's any validated server seems
1231                 // wasteful. Consider adding a new method in PrivateDnsConfiguration for speed ups.
1232                 if (!gPrivateDnsConfiguration.getStatus(netId).validatedServers().empty()) {
1233                     privateDnsStatus = gPrivateDnsConfiguration.getStatus(netId);
1234                     break;
1235                 }
1236             }
1237             if (privateDnsStatus.validatedServers().empty()) {
1238                 return -1;
1239             }
1240         }
1241     }
1242 
1243     LOG(INFO) << __func__ << ": performing query over TLS";
1244 
1245     const auto response = sDnsTlsDispatcher.query(privateDnsStatus.validatedServers(), statp, query,
1246                                                   answer, &resplen);
1247 
1248     LOG(INFO) << __func__ << ": TLS query result: " << static_cast<int>(response);
1249 
1250     if (privateDnsStatus.mode == PrivateDnsMode::OPPORTUNISTIC) {
1251         // In opportunistic mode, handle falling back to cleartext in some
1252         // cases (DNS shouldn't fail if a validated opportunistic mode server
1253         // becomes unreachable for some reason).
1254         switch (response) {
1255             case DnsTlsTransport::Response::success:
1256                 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1257                 return resplen;
1258             case DnsTlsTransport::Response::network_error:
1259                 // No need to set the error timeout here since it will fallback to UDP.
1260             case DnsTlsTransport::Response::internal_error:
1261                 // Note: this will cause cleartext queries to be emitted, with
1262                 // all of the EDNS0 goodness enabled. Fingers crossed.  :-/
1263                 *fallback = true;
1264                 [[fallthrough]];
1265             default:
1266                 return -1;
1267         }
1268     } else {
1269         // Strict mode
1270         switch (response) {
1271             case DnsTlsTransport::Response::success:
1272                 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1273                 return resplen;
1274             case DnsTlsTransport::Response::network_error:
1275                 // This case happens when the query stored in DnsTlsTransport is expired since
1276                 // either 1) the query has been tried for 3 times but no response or 2) fail to
1277                 // establish the connection with the server.
1278                 *rcode = RCODE_TIMEOUT;
1279                 [[fallthrough]];
1280             default:
1281                 return -1;
1282         }
1283     }
1284 }
1285 
resolv_res_nsend(const android_net_context * netContext,const uint8_t * msg,int msgLen,uint8_t * ans,int ansLen,int * rcode,uint32_t flags,NetworkDnsEventReported * event)1286 int resolv_res_nsend(const android_net_context* netContext, const uint8_t* msg, int msgLen,
1287                      uint8_t* ans, int ansLen, int* rcode, uint32_t flags,
1288                      NetworkDnsEventReported* event) {
1289     assert(event != nullptr);
1290     ResState res;
1291     res_init(&res, netContext, event);
1292     resolv_populate_res_for_net(&res);
1293     *rcode = NOERROR;
1294     return res_nsend(&res, msg, msgLen, ans, ansLen, rcode, flags);
1295 }
1296