1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "OffloadUtils.h"
18 
19 #include <arpa/inet.h>
20 #include <linux/if.h>
21 #include <linux/if_arp.h>
22 #include <linux/netlink.h>
23 #include <linux/pkt_cls.h>
24 #include <linux/pkt_sched.h>
25 #include <sys/ioctl.h>
26 #include <sys/socket.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29 
30 #define LOG_TAG "OffloadUtils"
31 #include <log/log.h>
32 
33 #include "NetlinkCommands.h"
34 #include "android-base/unique_fd.h"
35 
36 namespace android {
37 namespace net {
38 
39 using std::max;
40 
doSIOCGIF(const std::string & interface,int opt)41 static int doSIOCGIF(const std::string& interface, int opt) {
42     base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
43 
44     if (ufd < 0) {
45         const int err = errno;
46         ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
47         return -err;
48     };
49 
50     struct ifreq ifr = {};
51     // We use strncpy() instead of strlcpy() since kernel has to be able
52     // to handle non-zero terminated junk passed in by userspace anyway,
53     // and this way too long interface names (more than IFNAMSIZ-1 = 15
54     // characters plus terminating NULL) will not get truncated to 15
55     // characters and zero-terminated and thus potentially erroneously
56     // match a truncated interface if one were to exist.
57     strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
58 
59     if (ioctl(ufd, opt, &ifr, sizeof(ifr))) return -errno;
60 
61     if (opt == SIOCGIFHWADDR) return ifr.ifr_hwaddr.sa_family;
62     if (opt == SIOCGIFMTU) return ifr.ifr_mtu;
63     return -EINVAL;
64 }
65 
hardwareAddressType(const std::string & interface)66 int hardwareAddressType(const std::string& interface) {
67     return doSIOCGIF(interface, SIOCGIFHWADDR);
68 }
69 
deviceMTU(const std::string & interface)70 int deviceMTU(const std::string& interface) {
71     return doSIOCGIF(interface, SIOCGIFMTU);
72 }
73 
isEthernet(const std::string & interface)74 base::Result<bool> isEthernet(const std::string& interface) {
75     int rv = hardwareAddressType(interface);
76     if (rv < 0) {
77         errno = -rv;
78         return ErrnoErrorf("Get hardware address type of interface {} failed", interface);
79     }
80 
81     switch (rv) {
82         case ARPHRD_ETHER:
83             return true;
84         case ARPHRD_NONE:
85         case ARPHRD_RAWIP:  // in Linux 4.14+ rmnet support was upstreamed and this is 519
86         case 530:           // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
87             return false;
88         default:
89             errno = EAFNOSUPPORT;  // Address family not supported
90             return ErrnoErrorf("Unknown hardware address type {} on interface {}", rv, interface);
91     }
92 }
93 
94 // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
95 // and //system/netd/server/SockDiag.cpp:checkError(fd)
sendAndProcessNetlinkResponse(const void * req,int len)96 static int sendAndProcessNetlinkResponse(const void* req, int len) {
97     base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
98     if (fd == -1) {
99         const int err = errno;
100         ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
101         return -err;
102     }
103 
104     static constexpr int on = 1;
105     int rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
106     if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
107 
108     // this is needed to get sane strace netlink parsing, it allocates the pid
109     rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
110     if (rv) {
111         const int err = errno;
112         ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
113         return -err;
114     }
115 
116     // we do not want to receive messages from anyone besides the kernel
117     rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
118     if (rv) {
119         const int err = errno;
120         ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
121         return -err;
122     }
123 
124     rv = send(fd, req, len, 0);
125     if (rv == -1) return -errno;
126     if (rv != len) return -EMSGSIZE;
127 
128     struct {
129         nlmsghdr h;
130         nlmsgerr e;
131         char buf[256];
132     } resp = {};
133 
134     rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
135 
136     if (rv == -1) {
137         const int err = errno;
138         ALOGE("recv() failed");
139         return -err;
140     }
141 
142     if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
143         ALOGE("recv() returned short packet: %d", rv);
144         return -EMSGSIZE;
145     }
146 
147     if (resp.h.nlmsg_len != (unsigned)rv) {
148         ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
149         return -EBADMSG;
150     }
151 
152     if (resp.h.nlmsg_type != NLMSG_ERROR) {
153         ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
154         return -EBADMSG;
155     }
156 
157     return resp.e.error;  // returns 0 on success
158 }
159 
160 // ADD:     nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
161 // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
162 // DEL:     nlMsgType=RTM_DELQDISC nlMsgFlags=0
doTcQdiscClsact(int ifIndex,uint16_t nlMsgType,uint16_t nlMsgFlags)163 int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
164     // This is the name of the qdisc we are attaching.
165     // Some hoop jumping to make this compile time constant with known size,
166     // so that the structure declaration is well defined at compile time.
167 #define CLSACT "clsact"
168     // sizeof() includes the terminating NULL
169     static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
170 
171     const struct {
172         nlmsghdr n;
173         tcmsg t;
174         struct {
175             nlattr attr;
176             char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
177         } kind;
178     } req = {
179             .n =
180                     {
181                             .nlmsg_len = sizeof(req),
182                             .nlmsg_type = nlMsgType,
183                             .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
184                     },
185             .t =
186                     {
187                             .tcm_family = AF_UNSPEC,
188                             .tcm_ifindex = ifIndex,
189                             .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
190                             .tcm_parent = TC_H_CLSACT,
191                     },
192             .kind =
193                     {
194                             .attr =
195                                     {
196                                             .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
197                                             .nla_type = TCA_KIND,
198                                     },
199                             .str = CLSACT,
200                     },
201     };
202 #undef CLSACT
203 
204     return sendAndProcessNetlinkResponse(&req, sizeof(req));
205 }
206 
207 // The priority of clat hook - must be after tethering.
208 constexpr uint16_t PRIO_CLAT = 4;
209 
210 // tc filter add dev .. in/egress prio 4 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
211 // direct-action
tcFilterAddDevBpf(int ifIndex,bool ingress,uint16_t proto,int bpfFd,bool ethernet)212 int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t proto, int bpfFd, bool ethernet) {
213     // This is the name of the filter we're attaching (ie. this is the 'bpf'
214     // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
215     //
216     // We go through some hoops in order to make this compile time constants
217     // so that we can define the struct further down the function with the
218     // field for this sized correctly already during the build.
219 #define BPF "bpf"
220     // sizeof() includes the terminating NULL
221     static constexpr size_t ASCIIZ_LEN_BPF = sizeof(BPF);
222 
223     // This is to replicate program name suffix used by 'tc' Linux cli
224     // when it attaches programs.
225 #define FSOBJ_SUFFIX ":[*fsobj]"
226 
227     // This macro expands (from header files) to:
228     //   prog_clatd_schedcls_ingress6_clat_rawip:[*fsobj]
229     // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
230     // (also compatible with anything that has 0 size L2 header)
231     static constexpr char name_clat_rx_rawip[] = CLAT_INGRESS6_PROG_RAWIP_NAME FSOBJ_SUFFIX;
232 
233     // This macro expands (from header files) to:
234     //   prog_clatd_schedcls_ingress6_clat_ether:[*fsobj]
235     // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
236     // (also compatible with anything that has standard ethernet header)
237     static constexpr char name_clat_rx_ether[] = CLAT_INGRESS6_PROG_ETHER_NAME FSOBJ_SUFFIX;
238 
239     // This macro expands (from header files) to:
240     //   prog_clatd_schedcls_egress4_clat_rawip:[*fsobj]
241     // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
242     // (also compatible with anything that has 0 size L2 header)
243     static constexpr char name_clat_tx_rawip[] = CLAT_EGRESS4_PROG_RAWIP_NAME FSOBJ_SUFFIX;
244 
245     // This macro expands (from header files) to:
246     //   prog_clatd_schedcls_egress4_clat_ether:[*fsobj]
247     // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
248     // (also compatible with anything that has standard ethernet header)
249     static constexpr char name_clat_tx_ether[] = CLAT_EGRESS4_PROG_ETHER_NAME FSOBJ_SUFFIX;
250 
251 #undef FSOBJ_SUFFIX
252 
253     // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
254     // booleans.  We need to compile time allocate enough space in the struct
255     // hence this macro magic to make sure we have enough space for either
256     // possibility.  In practice some of these are actually the same size.
257     static constexpr size_t ASCIIZ_MAXLEN_NAME = max({
258             sizeof(name_clat_rx_rawip),
259             sizeof(name_clat_rx_ether),
260             sizeof(name_clat_tx_rawip),
261             sizeof(name_clat_tx_ether),
262     });
263 
264     // These are not compile time constants: 'name' is used in strncpy below
265     const char* const name_clat_rx = ethernet ? name_clat_rx_ether : name_clat_rx_rawip;
266     const char* const name_clat_tx = ethernet ? name_clat_tx_ether : name_clat_tx_rawip;
267     const char* const name = ingress ? name_clat_rx : name_clat_tx;
268 
269     struct {
270         nlmsghdr n;
271         tcmsg t;
272         struct {
273             nlattr attr;
274             char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
275         } kind;
276         struct {
277             nlattr attr;
278             struct {
279                 nlattr attr;
280                 __u32 u32;
281             } fd;
282             struct {
283                 nlattr attr;
284                 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
285             } name;
286             struct {
287                 nlattr attr;
288                 __u32 u32;
289             } flags;
290         } options;
291     } req = {
292             .n =
293                     {
294                             .nlmsg_len = sizeof(req),
295                             .nlmsg_type = RTM_NEWTFILTER,
296                             .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
297                     },
298             .t =
299                     {
300                             .tcm_family = AF_UNSPEC,
301                             .tcm_ifindex = ifIndex,
302                             .tcm_handle = TC_H_UNSPEC,
303                             .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
304                                                     ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
305                             .tcm_info = static_cast<__u32>((PRIO_CLAT << 16) | htons(proto)),
306                     },
307             .kind =
308                     {
309                             .attr =
310                                     {
311                                             .nla_len = sizeof(req.kind),
312                                             .nla_type = TCA_KIND,
313                                     },
314                             .str = BPF,
315                     },
316             .options =
317                     {
318                             .attr =
319                                     {
320                                             .nla_len = sizeof(req.options),
321                                             .nla_type = NLA_F_NESTED | TCA_OPTIONS,
322                                     },
323                             .fd =
324                                     {
325                                             .attr =
326                                                     {
327                                                             .nla_len = sizeof(req.options.fd),
328                                                             .nla_type = TCA_BPF_FD,
329                                                     },
330                                             .u32 = static_cast<__u32>(bpfFd),
331                                     },
332                             .name =
333                                     {
334                                             .attr =
335                                                     {
336                                                             .nla_len = sizeof(req.options.name),
337                                                             .nla_type = TCA_BPF_NAME,
338                                                     },
339                                             // Visible via 'tc filter show', but
340                                             // is overwritten by strncpy below
341                                             .str = "placeholder",
342                                     },
343                             .flags =
344                                     {
345                                             .attr =
346                                                     {
347                                                             .nla_len = sizeof(req.options.flags),
348                                                             .nla_type = TCA_BPF_FLAGS,
349                                                     },
350                                             .u32 = TCA_BPF_FLAG_ACT_DIRECT,
351                                     },
352                     },
353     };
354 #undef BPF
355 
356     strncpy(req.options.name.str, name, sizeof(req.options.name.str));
357 
358     return sendAndProcessNetlinkResponse(&req, sizeof(req));
359 }
360 
361 // tc filter del dev .. in/egress prio 4 protocol ..
tcFilterDelDev(int ifIndex,bool ingress,uint16_t proto)362 int tcFilterDelDev(int ifIndex, bool ingress, uint16_t proto) {
363     const struct {
364         nlmsghdr n;
365         tcmsg t;
366     } req = {
367             .n =
368                     {
369                             .nlmsg_len = sizeof(req),
370                             .nlmsg_type = RTM_DELTFILTER,
371                             .nlmsg_flags = NETLINK_REQUEST_FLAGS,
372                     },
373             .t =
374                     {
375                             .tcm_family = AF_UNSPEC,
376                             .tcm_ifindex = ifIndex,
377                             .tcm_handle = TC_H_UNSPEC,
378                             .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
379                                                     ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
380                             .tcm_info = static_cast<__u32>((PRIO_CLAT << 16) | htons(proto)),
381                     },
382     };
383 
384     return sendAndProcessNetlinkResponse(&req, sizeof(req));
385 }
386 
387 }  // namespace net
388 }  // namespace android
389