1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <map>
18 #include <string>
19 
20 #include <arpa/inet.h>
21 #include <errno.h>
22 #include <linux/if_tun.h>
23 #include <linux/ioctl.h>
24 #include <net/if.h>
25 #include <netinet/in.h>
26 #include <spawn.h>
27 #include <sys/types.h>
28 #include <sys/wait.h>
29 #include <unistd.h>
30 
31 #define LOG_TAG "ClatdController"
32 #include <log/log.h>
33 
34 #include "ClatdController.h"
35 #include "InterfaceController.h"
36 
37 #include "android-base/properties.h"
38 #include "android-base/scopeguard.h"
39 #include "android-base/stringprintf.h"
40 #include "android-base/unique_fd.h"
41 #include "bpf/BpfMap.h"
42 #include "netdbpf/bpf_shared.h"
43 #include "netdutils/DumpWriter.h"
44 
45 extern "C" {
46 #include "netutils/checksum.h"
47 }
48 
49 #include "Fwmark.h"
50 #include "NetdConstants.h"
51 #include "NetworkController.h"
52 #include "OffloadUtils.h"
53 #include "netid_client.h"
54 
55 static const char* kClatdPath = "/system/bin/clatd";
56 
57 // For historical reasons, start with 192.0.0.4, and after that, use all subsequent addresses in
58 // 192.0.0.0/29 (RFC 7335).
59 static const char* kV4AddrString = "192.0.0.4";
60 static const in_addr kV4Addr = {inet_addr(kV4AddrString)};
61 static const int kV4AddrLen = 29;
62 
63 using android::base::Result;
64 using android::base::StringPrintf;
65 using android::base::unique_fd;
66 using android::bpf::BpfMap;
67 using android::netdutils::DumpWriter;
68 using android::netdutils::ScopedIndent;
69 
70 namespace android {
71 namespace net {
72 
init(void)73 void ClatdController::init(void) {
74     std::lock_guard guard(mutex);
75 
76     int rv = getClatEgress4MapFd();
77     if (rv < 0) {
78         ALOGE("getClatEgress4MapFd() failure: %s", strerror(-rv));
79         return;
80     }
81     mClatEgress4Map.reset(rv);
82 
83     rv = getClatIngress6MapFd();
84     if (rv < 0) {
85         ALOGE("getClatIngress6MapFd() failure: %s", strerror(-rv));
86         mClatEgress4Map.reset(-1);
87         return;
88     }
89     mClatIngress6Map.reset(rv);
90 
91     mClatEgress4Map.clear();
92     mClatIngress6Map.clear();
93 }
94 
isIpv4AddressFree(in_addr_t addr)95 bool ClatdController::isIpv4AddressFree(in_addr_t addr) {
96     int s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
97     if (s == -1) {
98         return 0;
99     }
100 
101     // Attempt to connect to the address. If the connection succeeds and getsockname returns the
102     // same then the address is already assigned to the system and we can't use it.
103     struct sockaddr_in sin = {
104             .sin_family = AF_INET,
105             .sin_port = htons(53),
106             .sin_addr = {addr},
107     };
108     socklen_t len = sizeof(sin);
109     bool inuse = connect(s, (struct sockaddr*)&sin, sizeof(sin)) == 0 &&
110                  getsockname(s, (struct sockaddr*)&sin, &len) == 0 && (size_t)len >= sizeof(sin) &&
111                  sin.sin_addr.s_addr == addr;
112 
113     close(s);
114     return !inuse;
115 }
116 
117 // Picks a free IPv4 address, starting from ip and trying all addresses in the prefix in order.
118 //   ip        - the IP address from the configuration file
119 //   prefixlen - the length of the prefix from which addresses may be selected.
120 //   returns: the IPv4 address, or INADDR_NONE if no addresses were available
selectIpv4Address(const in_addr ip,int16_t prefixlen)121 in_addr_t ClatdController::selectIpv4Address(const in_addr ip, int16_t prefixlen) {
122     // Don't accept prefixes that are too large because we scan addresses one by one.
123     if (prefixlen < 16 || prefixlen > 32) {
124         return INADDR_NONE;
125     }
126 
127     // All these are in host byte order.
128     in_addr_t mask = 0xffffffff >> (32 - prefixlen) << (32 - prefixlen);
129     in_addr_t ipv4 = ntohl(ip.s_addr);
130     in_addr_t first_ipv4 = ipv4;
131     in_addr_t prefix = ipv4 & mask;
132 
133     // Pick the first IPv4 address in the pool, wrapping around if necessary.
134     // So, for example, 192.0.0.4 -> 192.0.0.5 -> 192.0.0.6 -> 192.0.0.7 -> 192.0.0.0.
135     do {
136         if (isIpv4AddressFreeFunc(htonl(ipv4))) {
137             return htonl(ipv4);
138         }
139         ipv4 = prefix | ((ipv4 + 1) & ~mask);
140     } while (ipv4 != first_ipv4);
141 
142     return INADDR_NONE;
143 }
144 
145 // Alters the bits in the IPv6 address to make them checksum neutral with v4 and nat64Prefix.
makeChecksumNeutral(in6_addr * v6,const in_addr v4,const in6_addr & nat64Prefix)146 void ClatdController::makeChecksumNeutral(in6_addr* v6, const in_addr v4,
147                                           const in6_addr& nat64Prefix) {
148     // Fill last 8 bytes of IPv6 address with random bits.
149     arc4random_buf(&v6->s6_addr[8], 8);
150 
151     // Make the IID checksum-neutral. That is, make it so that:
152     //   checksum(Local IPv4 | Remote IPv4) = checksum(Local IPv6 | Remote IPv6)
153     // in other words (because remote IPv6 = NAT64 prefix | Remote IPv4):
154     //   checksum(Local IPv4) = checksum(Local IPv6 | NAT64 prefix)
155     // Do this by adjusting the two bytes in the middle of the IID.
156 
157     uint16_t middlebytes = (v6->s6_addr[11] << 8) + v6->s6_addr[12];
158 
159     uint32_t c1 = ip_checksum_add(0, &v4, sizeof(v4));
160     uint32_t c2 = ip_checksum_add(0, &nat64Prefix, sizeof(nat64Prefix)) +
161                   ip_checksum_add(0, v6, sizeof(*v6));
162 
163     uint16_t delta = ip_checksum_adjust(middlebytes, c1, c2);
164     v6->s6_addr[11] = delta >> 8;
165     v6->s6_addr[12] = delta & 0xff;
166 }
167 
168 // Picks a random interface ID that is checksum neutral with the IPv4 address and the NAT64 prefix.
generateIpv6Address(const char * iface,const in_addr v4,const in6_addr & nat64Prefix,in6_addr * v6)169 int ClatdController::generateIpv6Address(const char* iface, const in_addr v4,
170                                          const in6_addr& nat64Prefix, in6_addr* v6) {
171     unique_fd s(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
172     if (s == -1) return -errno;
173 
174     if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, iface, strlen(iface) + 1) == -1) {
175         return -errno;
176     }
177 
178     sockaddr_in6 sin6 = {.sin6_family = AF_INET6, .sin6_addr = nat64Prefix};
179     if (connect(s, reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)) == -1) {
180         return -errno;
181     }
182 
183     socklen_t len = sizeof(sin6);
184     if (getsockname(s, reinterpret_cast<struct sockaddr*>(&sin6), &len) == -1) {
185         return -errno;
186     }
187 
188     *v6 = sin6.sin6_addr;
189 
190     if (IN6_IS_ADDR_UNSPECIFIED(v6) || IN6_IS_ADDR_LOOPBACK(v6) || IN6_IS_ADDR_LINKLOCAL(v6) ||
191         IN6_IS_ADDR_SITELOCAL(v6) || IN6_IS_ADDR_ULA(v6)) {
192         return -ENETUNREACH;
193     }
194 
195     makeChecksumNeutral(v6, v4, nat64Prefix);
196 
197     return 0;
198 }
199 
maybeStartBpf(const ClatdTracker & tracker)200 void ClatdController::maybeStartBpf(const ClatdTracker& tracker) {
201     auto isEthernet = android::net::isEthernet(tracker.iface);
202     if (!isEthernet.ok()) {
203         ALOGE("isEthernet(%s[%d]) failure: %s", tracker.iface, tracker.ifIndex,
204               isEthernet.error().message().c_str());
205         return;
206     }
207 
208     // This program will be attached to the v4-* interface which is a TUN and thus always rawip.
209     int rv = getClatEgress4ProgFd(RAWIP);
210     if (rv < 0) {
211         ALOGE("getClatEgress4ProgFd(RAWIP) failure: %s", strerror(-rv));
212         return;
213     }
214     unique_fd txRawIpProgFd(rv);
215 
216     rv = getClatIngress6ProgFd(isEthernet.value());
217     if (rv < 0) {
218         ALOGE("getClatIngress6ProgFd(%d) failure: %s", isEthernet.value(), strerror(-rv));
219         return;
220     }
221     unique_fd rxProgFd(rv);
222 
223     ClatEgress4Key txKey = {
224             .iif = tracker.v4ifIndex,
225             .local4 = tracker.v4,
226     };
227     ClatEgress4Value txValue = {
228             .oif = tracker.ifIndex,
229             .local6 = tracker.v6,
230             .pfx96 = tracker.pfx96,
231             .oifIsEthernet = isEthernet.value(),
232     };
233 
234     auto ret = mClatEgress4Map.writeValue(txKey, txValue, BPF_ANY);
235     if (!ret.ok()) {
236         ALOGE("mClatEgress4Map.writeValue failure: %s", strerror(ret.error().code()));
237         return;
238     }
239 
240     ClatIngress6Key rxKey = {
241             .iif = tracker.ifIndex,
242             .pfx96 = tracker.pfx96,
243             .local6 = tracker.v6,
244     };
245     ClatIngress6Value rxValue = {
246             // TODO: move all the clat code to eBPF and remove the tun interface entirely.
247             .oif = tracker.v4ifIndex,
248             .local4 = tracker.v4,
249     };
250 
251     ret = mClatIngress6Map.writeValue(rxKey, rxValue, BPF_ANY);
252     if (!ret.ok()) {
253         ALOGE("mClatIngress6Map.writeValue failure: %s", strerror(ret.error().code()));
254         ret = mClatEgress4Map.deleteValue(txKey);
255         if (!ret.ok())
256             ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
257         return;
258     }
259 
260     // We do tc setup *after* populating the maps, so scanning through them
261     // can always be used to tell us what needs cleanup.
262 
263     // Usually the clsact will be added in RouteController::addInterfaceToPhysicalNetwork.
264     // But clat is started before the v4- interface is added to the network. The clat startup have
265     // to add clsact of v4- tun interface first for adding bpf filter in maybeStartBpf.
266     // TODO: move "qdisc add clsact" of v4- tun interface out from ClatdController.
267     rv = tcQdiscAddDevClsact(tracker.v4ifIndex);
268     if (rv) {
269         ALOGE("tcQdiscAddDevClsact(%d[%s]) failure: %s", tracker.v4ifIndex, tracker.v4iface,
270               strerror(-rv));
271         ret = mClatEgress4Map.deleteValue(txKey);
272         if (!ret.ok())
273             ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
274         ret = mClatIngress6Map.deleteValue(rxKey);
275         if (!ret.ok())
276             ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
277         return;
278     }
279 
280     rv = tcFilterAddDevEgressClatIpv4(tracker.v4ifIndex, txRawIpProgFd, RAWIP);
281     if (rv) {
282         ALOGE("tcFilterAddDevEgressClatIpv4(%d[%s], RAWIP) failure: %s", tracker.v4ifIndex,
283               tracker.v4iface, strerror(-rv));
284 
285         // The v4- interface clsact is not deleted for unwinding error because once it is created
286         // with interface addition, the lifetime is till interface deletion. Moreover, the clsact
287         // has no clat filter now. It should not break anything.
288 
289         ret = mClatEgress4Map.deleteValue(txKey);
290         if (!ret.ok())
291             ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
292         ret = mClatIngress6Map.deleteValue(rxKey);
293         if (!ret.ok())
294             ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
295         return;
296     }
297 
298     rv = tcFilterAddDevIngressClatIpv6(tracker.ifIndex, rxProgFd, isEthernet.value());
299     if (rv) {
300         ALOGE("tcFilterAddDevIngressClatIpv6(%d[%s], %d) failure: %s", tracker.ifIndex,
301               tracker.iface, isEthernet.value(), strerror(-rv));
302         rv = tcFilterDelDevEgressClatIpv4(tracker.v4ifIndex);
303         if (rv) {
304             ALOGE("tcFilterDelDevEgressClatIpv4(%d[%s]) failure: %s", tracker.v4ifIndex,
305                   tracker.v4iface, strerror(-rv));
306         }
307 
308         // The v4- interface clsact is not deleted. See the reason in the error unwinding code of
309         // the egress filter attaching of v4- tun interface.
310 
311         ret = mClatEgress4Map.deleteValue(txKey);
312         if (!ret.ok())
313             ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
314         ret = mClatIngress6Map.deleteValue(rxKey);
315         if (!ret.ok())
316             ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
317         return;
318     }
319 
320     // success
321 }
322 
setIptablesDropRule(bool add,const char * iface,const char * pfx96Str,const char * v6Str)323 void ClatdController::setIptablesDropRule(bool add, const char* iface, const char* pfx96Str,
324                                           const char* v6Str) {
325     std::string cmd = StringPrintf(
326             "*raw\n"
327             "%s %s -i %s -s %s/96 -d %s -j DROP\n"
328             "COMMIT\n",
329             (add ? "-A" : "-D"), LOCAL_RAW_PREROUTING, iface, pfx96Str, v6Str);
330 
331     iptablesRestoreFunction(V6, cmd);
332 }
333 
maybeStopBpf(const ClatdTracker & tracker)334 void ClatdController::maybeStopBpf(const ClatdTracker& tracker) {
335     int rv = tcFilterDelDevIngressClatIpv6(tracker.ifIndex);
336     if (rv < 0) {
337         ALOGE("tcFilterDelDevIngressClatIpv6(%d[%s]) failure: %s", tracker.ifIndex, tracker.iface,
338               strerror(-rv));
339     }
340 
341     rv = tcFilterDelDevEgressClatIpv4(tracker.v4ifIndex);
342     if (rv < 0) {
343         ALOGE("tcFilterDelDevEgressClatIpv4(%d[%s]) failure: %s", tracker.v4ifIndex,
344               tracker.v4iface, strerror(-rv));
345     }
346 
347     // We cleanup the maps last, so scanning through them can be used to
348     // determine what still needs cleanup.
349 
350     ClatEgress4Key txKey = {
351             .iif = tracker.v4ifIndex,
352             .local4 = tracker.v4,
353     };
354 
355     auto ret = mClatEgress4Map.deleteValue(txKey);
356     if (!ret.ok()) ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
357 
358     ClatIngress6Key rxKey = {
359             .iif = tracker.ifIndex,
360             .pfx96 = tracker.pfx96,
361             .local6 = tracker.v6,
362     };
363 
364     ret = mClatIngress6Map.deleteValue(rxKey);
365     if (!ret.ok()) ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
366 }
367 
368 // Finds the tracker of the clatd running on interface |interface|, or nullptr if clatd has not been
369 // started  on |interface|.
getClatdTracker(const std::string & interface)370 ClatdController::ClatdTracker* ClatdController::getClatdTracker(const std::string& interface) {
371     auto it = mClatdTrackers.find(interface);
372     return (it == mClatdTrackers.end() ? nullptr : &it->second);
373 }
374 
375 // Initializes a ClatdTracker for the specified interface.
init(unsigned networkId,const std::string & interface,const std::string & v4interface,const std::string & nat64Prefix)376 int ClatdController::ClatdTracker::init(unsigned networkId, const std::string& interface,
377                                         const std::string& v4interface,
378                                         const std::string& nat64Prefix) {
379     fwmark.netId = networkId;
380     fwmark.explicitlySelected = true;
381     fwmark.protectedFromVpn = true;
382     fwmark.permission = PERMISSION_SYSTEM;
383 
384     snprintf(fwmarkString, sizeof(fwmarkString), "0x%x", fwmark.intValue);
385     strlcpy(iface, interface.c_str(), sizeof(iface));
386     ifIndex = if_nametoindex(iface);
387     strlcpy(v4iface, v4interface.c_str(), sizeof(v4iface));
388     v4ifIndex = if_nametoindex(v4iface);
389 
390     // Pass in everything that clatd needs: interface, a fwmark for outgoing packets, the NAT64
391     // prefix, and the IPv4 and IPv6 addresses.
392     // Validate the prefix and strip off the prefix length.
393     uint8_t family;
394     uint8_t prefixLen;
395     int res = parsePrefix(nat64Prefix.c_str(), &family, &pfx96, sizeof(pfx96), &prefixLen);
396     // clatd only supports /96 prefixes.
397     if (res != sizeof(pfx96)) return res;
398     if (family != AF_INET6) return -EAFNOSUPPORT;
399     if (prefixLen != 96) return -EINVAL;
400     if (!inet_ntop(AF_INET6, &pfx96, pfx96String, sizeof(pfx96String))) return -errno;
401 
402     // Pick an IPv4 address.
403     // TODO: this picks the address based on other addresses that are assigned to interfaces, but
404     // the address is only actually assigned to an interface once clatd starts up. So we could end
405     // up with two clatd instances with the same IPv4 address.
406     // Stop doing this and instead pick a free one from the kV4Addr pool.
407     v4 = {selectIpv4Address(kV4Addr, kV4AddrLen)};
408     if (v4.s_addr == INADDR_NONE) {
409         ALOGE("No free IPv4 address in %s/%d", kV4AddrString, kV4AddrLen);
410         return -EADDRNOTAVAIL;
411     }
412     if (!inet_ntop(AF_INET, &v4, v4Str, sizeof(v4Str))) return -errno;
413 
414     // Generate a checksum-neutral IID.
415     if (generateIpv6Address(iface, v4, pfx96, &v6)) {
416         ALOGE("Unable to find global source address on %s for %s", iface, pfx96String);
417         return -EADDRNOTAVAIL;
418     }
419     if (!inet_ntop(AF_INET6, &v6, v6Str, sizeof(v6Str))) return -errno;
420 
421     ALOGD("starting clatd on %s v4=%s v6=%s pfx96=%s", iface, v4Str, v6Str, pfx96String);
422     return 0;
423 }
424 
startClatd(const std::string & interface,const std::string & nat64Prefix,std::string * v6Str)425 int ClatdController::startClatd(const std::string& interface, const std::string& nat64Prefix,
426                                 std::string* v6Str) {
427     std::lock_guard guard(mutex);
428 
429     // 1. fail if pre-existing tracker already exists
430     ClatdTracker* existing = getClatdTracker(interface);
431     if (existing != nullptr) {
432         ALOGE("clatd pid=%d already started on %s", existing->pid, interface.c_str());
433         return -EBUSY;
434     }
435 
436     // 2. get network id associated with this external interface
437     unsigned networkId = mNetCtrl->getNetworkForInterface(interface.c_str());
438     if (networkId == NETID_UNSET) {
439         ALOGE("Interface %s not assigned to any netId", interface.c_str());
440         return -ENODEV;
441     }
442 
443     // 3. open the tun device in non blocking mode as required by clatd
444     int res = open("/dev/net/tun", O_RDWR | O_NONBLOCK | O_CLOEXEC);
445     if (res == -1) {
446         res = errno;
447         ALOGE("open of tun device failed (%s)", strerror(res));
448         return -res;
449     }
450     unique_fd tmpTunFd(res);
451 
452     // 4. create the v4-... tun interface
453     std::string v4interface("v4-");
454     v4interface += interface;
455 
456     struct ifreq ifr = {
457             .ifr_flags = IFF_TUN,
458     };
459     strlcpy(ifr.ifr_name, v4interface.c_str(), sizeof(ifr.ifr_name));
460 
461     res = ioctl(tmpTunFd, TUNSETIFF, &ifr, sizeof(ifr));
462     if (res == -1) {
463         res = errno;
464         ALOGE("ioctl(TUNSETIFF) failed (%s)", strerror(res));
465         return -res;
466     }
467 
468     // disable IPv6 on it - failing to do so is not a critical error
469     res = InterfaceController::setEnableIPv6(v4interface.c_str(), 0);
470     if (res) ALOGE("setEnableIPv6 %s failed (%s)", v4interface.c_str(), strerror(res));
471 
472     // 5. initialize tracker object
473     ClatdTracker tracker;
474     int ret = tracker.init(networkId, interface, v4interface, nat64Prefix);
475     if (ret) return ret;
476 
477     // 6. create a throwaway socket to reserve a file descriptor number
478     res = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
479     if (res == -1) {
480         res = errno;
481         ALOGE("socket(ipv6/udp) failed (%s)", strerror(res));
482         return -res;
483     }
484     unique_fd passedTunFd(res);
485 
486     // 7. this is the FD we'll pass to clatd on the cli, so need it as a string
487     char passedTunFdStr[INT32_STRLEN];
488     snprintf(passedTunFdStr, sizeof(passedTunFdStr), "%d", passedTunFd.get());
489 
490     // 8. we're going to use this as argv[0] to clatd to make ps output more useful
491     std::string progname("clatd-");
492     progname += tracker.iface;
493 
494     // clang-format off
495     const char* args[] = {progname.c_str(),
496                           "-i", tracker.iface,
497                           "-m", tracker.fwmarkString,
498                           "-p", tracker.pfx96String,
499                           "-4", tracker.v4Str,
500                           "-6", tracker.v6Str,
501                           "-t", passedTunFdStr,
502                           nullptr};
503     // clang-format on
504 
505     // 9. register vfork requirement
506     posix_spawnattr_t attr;
507     res = posix_spawnattr_init(&attr);
508     if (res) {
509         ALOGE("posix_spawnattr_init failed (%s)", strerror(res));
510         return -res;
511     }
512     const android::base::ScopeGuard attrGuard = [&] { posix_spawnattr_destroy(&attr); };
513     res = posix_spawnattr_setflags(&attr, POSIX_SPAWN_USEVFORK);
514     if (res) {
515         ALOGE("posix_spawnattr_setflags failed (%s)", strerror(res));
516         return -res;
517     }
518 
519     // 10. register dup2() action: this is what 'clears' the CLOEXEC flag
520     // on the tun fd that we want the child clatd process to inherit
521     // (this will happen after the vfork, and before the execve)
522     posix_spawn_file_actions_t fa;
523     res = posix_spawn_file_actions_init(&fa);
524     if (res) {
525         ALOGE("posix_spawn_file_actions_init failed (%s)", strerror(res));
526         return -res;
527     }
528     const android::base::ScopeGuard faGuard = [&] { posix_spawn_file_actions_destroy(&fa); };
529     res = posix_spawn_file_actions_adddup2(&fa, tmpTunFd, passedTunFd);
530     if (res) {
531         ALOGE("posix_spawn_file_actions_adddup2 failed (%s)", strerror(res));
532         return -res;
533     }
534 
535     // 11. add the drop rule for iptables.
536     setIptablesDropRule(true, tracker.iface, tracker.pfx96String, tracker.v6Str);
537 
538     // 12. actually perform vfork/dup2/execve
539     res = posix_spawn(&tracker.pid, kClatdPath, &fa, &attr, (char* const*)args, nullptr);
540     if (res) {
541         ALOGE("posix_spawn failed (%s)", strerror(res));
542         return -res;
543     }
544 
545     // 13. configure eBPF offload - if possible
546     maybeStartBpf(tracker);
547 
548     mClatdTrackers[interface] = tracker;
549     ALOGD("clatd started on %s", interface.c_str());
550 
551     *v6Str = tracker.v6Str;
552     return 0;
553 }
554 
stopClatd(const std::string & interface)555 int ClatdController::stopClatd(const std::string& interface) {
556     std::lock_guard guard(mutex);
557     ClatdTracker* tracker = getClatdTracker(interface);
558 
559     if (tracker == nullptr) {
560         ALOGE("clatd already stopped");
561         return -ENODEV;
562     }
563 
564     ALOGD("Stopping clatd pid=%d on %s", tracker->pid, interface.c_str());
565 
566     maybeStopBpf(*tracker);
567 
568     kill(tracker->pid, SIGTERM);
569     waitpid(tracker->pid, nullptr, 0);
570 
571     setIptablesDropRule(false, tracker->iface, tracker->pfx96String, tracker->v6Str);
572     mClatdTrackers.erase(interface);
573 
574     ALOGD("clatd on %s stopped", interface.c_str());
575 
576     return 0;
577 }
578 
dumpEgress(DumpWriter & dw)579 void ClatdController::dumpEgress(DumpWriter& dw) {
580     if (!mClatEgress4Map.isValid()) return;  // if unsupported just don't dump anything
581 
582     ScopedIndent bpfIndent(dw);
583     dw.println("BPF egress map: iif(iface) v4Addr -> v6Addr nat64Prefix oif(iface)");
584 
585     ScopedIndent bpfDetailIndent(dw);
586     const auto printClatMap = [&dw](const ClatEgress4Key& key, const ClatEgress4Value& value,
587                                     const BpfMap<ClatEgress4Key, ClatEgress4Value>&) {
588         char iifStr[IFNAMSIZ] = "?";
589         char local4Str[INET_ADDRSTRLEN] = "?";
590         char local6Str[INET6_ADDRSTRLEN] = "?";
591         char pfx96Str[INET6_ADDRSTRLEN] = "?";
592         char oifStr[IFNAMSIZ] = "?";
593 
594         if_indextoname(key.iif, iifStr);
595         inet_ntop(AF_INET, &key.local4, local4Str, sizeof(local4Str));
596         inet_ntop(AF_INET6, &value.local6, local6Str, sizeof(local6Str));
597         inet_ntop(AF_INET6, &value.pfx96, pfx96Str, sizeof(pfx96Str));
598         if_indextoname(value.oif, oifStr);
599 
600         dw.println("%u(%s) %s -> %s %s/96 %u(%s) %s", key.iif, iifStr, local4Str, local6Str,
601                    pfx96Str, value.oif, oifStr, value.oifIsEthernet ? "ether" : "rawip");
602         return Result<void>();
603     };
604     auto res = mClatEgress4Map.iterateWithValue(printClatMap);
605     if (!res.ok()) {
606         dw.println("Error printing BPF map: %s", res.error().message().c_str());
607     }
608 }
609 
dumpIngress(DumpWriter & dw)610 void ClatdController::dumpIngress(DumpWriter& dw) {
611     if (!mClatIngress6Map.isValid()) return;  // if unsupported just don't dump anything
612 
613     ScopedIndent bpfIndent(dw);
614     dw.println("BPF ingress map: iif(iface) nat64Prefix v6Addr -> v4Addr oif(iface)");
615 
616     ScopedIndent bpfDetailIndent(dw);
617     const auto printClatMap = [&dw](const ClatIngress6Key& key, const ClatIngress6Value& value,
618                                     const BpfMap<ClatIngress6Key, ClatIngress6Value>&) {
619         char iifStr[IFNAMSIZ] = "?";
620         char pfx96Str[INET6_ADDRSTRLEN] = "?";
621         char local6Str[INET6_ADDRSTRLEN] = "?";
622         char local4Str[INET_ADDRSTRLEN] = "?";
623         char oifStr[IFNAMSIZ] = "?";
624 
625         if_indextoname(key.iif, iifStr);
626         inet_ntop(AF_INET6, &key.pfx96, pfx96Str, sizeof(pfx96Str));
627         inet_ntop(AF_INET6, &key.local6, local6Str, sizeof(local6Str));
628         inet_ntop(AF_INET, &value.local4, local4Str, sizeof(local4Str));
629         if_indextoname(value.oif, oifStr);
630 
631         dw.println("%u(%s) %s/96 %s -> %s %u(%s)", key.iif, iifStr, pfx96Str, local6Str, local4Str,
632                    value.oif, oifStr);
633         return Result<void>();
634     };
635     auto res = mClatIngress6Map.iterateWithValue(printClatMap);
636     if (!res.ok()) {
637         dw.println("Error printing BPF map: %s", res.error().message().c_str());
638     }
639 }
640 
dumpTrackers(DumpWriter & dw)641 void ClatdController::dumpTrackers(DumpWriter& dw) {
642     ScopedIndent trackerIndent(dw);
643     dw.println("Trackers: iif[iface] nat64Prefix v6Addr -> v4Addr v4iif[v4iface] [fwmark]");
644 
645     ScopedIndent trackerDetailIndent(dw);
646     for (const auto& pair : mClatdTrackers) {
647         const ClatdTracker& tracker = pair.second;
648         dw.println("%u[%s] %s/96 %s -> %s %u[%s] [%s]", tracker.ifIndex, tracker.iface,
649                    tracker.pfx96String, tracker.v6Str, tracker.v4Str, tracker.v4ifIndex,
650                    tracker.v4iface, tracker.fwmarkString);
651     }
652 }
653 
dump(DumpWriter & dw)654 void ClatdController::dump(DumpWriter& dw) {
655     std::lock_guard guard(mutex);
656 
657     ScopedIndent clatdIndent(dw);
658     dw.println("ClatdController");
659 
660     dumpTrackers(dw);
661     dumpIngress(dw);
662     dumpEgress(dw);
663 }
664 
665 auto ClatdController::isIpv4AddressFreeFunc = isIpv4AddressFree;
666 auto ClatdController::iptablesRestoreFunction = execIptablesRestore;
667 
668 }  // namespace net
669 }  // namespace android
670