1 // Copyright (c) PLUMgrid, Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License")
3 #include <bcc/proto.h>
4 
5 #define _memcpy __builtin_memcpy
6 
7 // meta data passed between bpf programs
8 typedef struct bpf_metadata {
9     u32 prog_id;
10     u32 rx_port_id;
11 } bpf_metadata_t;
12 
13 typedef struct bpf_dest {
14     u32 prog_id;
15     u32 port_id;
16 } bpf_dest_t;
17 
18 // use u64 to represent eth_addr.
19 // maintain the structure though to indicate the semantics
20 typedef struct eth_addr {
21     u64 addr;
22 } eth_addr_t;
23 
24 // Program table definitions for tail calls
25 BPF_PROG_ARRAY(jump, 16);
26 
27 // physical endpoint manager (pem) tables which connects to boeht bridge 1 and bridge 2
28 // <port_id, bpf_dest>
29 BPF_ARRAY(pem_dest, bpf_dest_t, 256);
30 // <port_id, ifindex>
31 BPF_ARRAY(pem_port, u32, 256);
32 // <ifindex, port_id>
33 BPF_HASH(pem_ifindex, u32, u32, 256);
34 // <0, tx2vm_pkts>
35 BPF_ARRAY(pem_stats, u32, 1);
36 
37 // bridge 1 (br1) tables
38 // <port_id, bpf_dest>
39 BPF_ARRAY(br1_dest, bpf_dest_t, 256);
40 // <eth_addr, port_id>
41 BPF_HASH(br1_mac, eth_addr_t, u32, 256);
42 // <0, rtr_ifindex>
43 BPF_ARRAY(br1_rtr, u32, 1);
44 // <mac, ifindex>
45 BPF_HASH(br1_mac_ifindex, eth_addr_t, u32, 1);
46 
47 // bridge 2 (br2) tables
48 // <port_id, bpf_dest>
49 BPF_ARRAY(br2_dest, bpf_dest_t, 256);
50 // <eth_addr, port_id>
51 BPF_HASH(br2_mac, eth_addr_t, u32, 256);
52 // <0, rtr_ifindex>
53 BPF_ARRAY(br2_rtr, u32, 1);
54 // <mac, ifindex>
55 BPF_HASH(br2_mac_ifindex, eth_addr_t, u32, 1);
56 
pem(struct __sk_buff * skb)57 int pem(struct __sk_buff *skb) {
58     bpf_metadata_t meta = {};
59     u32 ifindex;
60     u32 *tx_port_id_p;
61     u32 tx_port_id;
62     u32 rx_port;
63     u32 *ifindex_p;
64     bpf_dest_t *dest_p;
65 
66     // pem does not look at packet data
67     if (skb->tc_index == 0) {
68         skb->tc_index = 1;
69         skb->cb[0] = skb->cb[1] = 0;
70         meta.prog_id = meta.rx_port_id = 0;
71     } else {
72         meta.prog_id = skb->cb[0];
73         asm volatile("" ::: "memory");
74         meta.rx_port_id = skb->cb[1];
75     }
76     if (!meta.prog_id) {
77         /* from external */
78         ifindex = skb->ingress_ifindex;
79         tx_port_id_p = pem_ifindex.lookup(&ifindex);
80         if (tx_port_id_p) {
81             tx_port_id = *tx_port_id_p;
82             dest_p = pem_dest.lookup(&tx_port_id);
83             if (dest_p) {
84                 skb->cb[0] = dest_p->prog_id;
85                 skb->cb[1] = dest_p->port_id;
86                 jump.call(skb, dest_p->prog_id);
87             }
88         }
89     } else {
90         /* from internal */
91         rx_port = meta.rx_port_id;
92         ifindex_p = pem_port.lookup(&rx_port);
93         if (ifindex_p) {
94 #if 1
95             /* accumulate stats, may hurt performance slightly */
96             u32 index = 0;
97             u32 *value = pem_stats.lookup(&index);
98             if (value)
99                 lock_xadd(value, 1);
100 #endif
101             bpf_clone_redirect(skb, *ifindex_p, 0);
102         }
103     }
104 
105     return 1;
106 }
107 
br_common(struct __sk_buff * skb,int which_br)108 static int br_common(struct __sk_buff *skb, int which_br) {
109     u8 *cursor = 0;
110     u16 proto;
111     u16 arpop;
112     eth_addr_t dmac;
113     u8 *mac_p;
114     u32 dip;
115     u32 *tx_port_id_p;
116     u32 tx_port_id;
117     bpf_dest_t *dest_p;
118     u32 index, *rtrif_p;
119 
120     struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet));
121     /* handle ethernet packet header */
122     {
123         dmac.addr = ethernet->dst;
124         /* skb->tc_index may be preserved across router namespace if router simply rewrite packet
125          * and send it back.
126          */
127         if (skb->tc_index == 1) {
128             /* packet from pem, send to the router, set tc_index to 2 */
129             skb->tc_index = 2;
130             if (dmac.addr == 0xffffffffffffULL) {
131                  index = 0;
132                  if (which_br == 1)
133                      rtrif_p = br1_rtr.lookup(&index);
134                  else
135                      rtrif_p = br2_rtr.lookup(&index);
136                  if (rtrif_p)
137                      bpf_clone_redirect(skb, *rtrif_p, 0);
138              } else {
139                  /* the dmac address should match the router's */
140                  if (which_br == 1)
141                      rtrif_p = br1_mac_ifindex.lookup(&dmac);
142                  else
143                      rtrif_p = br2_mac_ifindex.lookup(&dmac);
144                  if (rtrif_p)
145                      bpf_clone_redirect(skb, *rtrif_p, 0);
146              }
147              return 1;
148         }
149 
150         /* set the tc_index to 1 so pem knows it is from internal */
151         skb->tc_index = 1;
152         switch (ethernet->type) {
153             case ETH_P_IP: goto ip;
154             case ETH_P_ARP: goto arp;
155             case ETH_P_8021Q: goto dot1q;
156             default: goto EOP;
157         }
158     }
159 
160     dot1q: {
161         struct dot1q_t *dot1q = cursor_advance(cursor, sizeof(*dot1q));
162         switch(dot1q->type) {
163             case ETH_P_IP: goto ip;
164             case ETH_P_ARP: goto arp;
165             default: goto EOP;
166         }
167     }
168 
169     arp: {
170         struct arp_t *arp = cursor_advance(cursor, sizeof(*arp));
171         /* mac learning */
172         arpop = arp->oper;
173         if (arpop == 2) {
174             index = 0;
175             if (which_br == 1)
176                 rtrif_p = br1_rtr.lookup(&index);
177             else
178                 rtrif_p = br2_rtr.lookup(&index);
179             if (rtrif_p) {
180                 __u32 ifindex = *rtrif_p;
181                 eth_addr_t smac;
182 
183                 smac.addr = ethernet->src;
184                 if (which_br == 1)
185                     br1_mac_ifindex.update(&smac, &ifindex);
186                 else
187                     br2_mac_ifindex.update(&smac, &ifindex);
188             }
189         }
190         goto xmit;
191     }
192 
193     ip: {
194         struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
195         goto xmit;
196     }
197 
198 xmit:
199     if (which_br == 1)
200         tx_port_id_p = br1_mac.lookup(&dmac);
201     else
202         tx_port_id_p = br2_mac.lookup(&dmac);
203     if (tx_port_id_p) {
204         tx_port_id = *tx_port_id_p;
205         if (which_br == 1)
206             dest_p = br1_dest.lookup(&tx_port_id);
207         else
208             dest_p = br2_dest.lookup(&tx_port_id);
209         if (dest_p) {
210             skb->cb[0] = dest_p->prog_id;
211             skb->cb[1] = dest_p->port_id;
212             jump.call(skb, dest_p->prog_id);
213         }
214     }
215 
216 EOP:
217     return 1;
218 }
219 
br1(struct __sk_buff * skb)220 int br1(struct __sk_buff *skb) {
221     return br_common(skb, 1);
222 }
223 
br2(struct __sk_buff * skb)224 int br2(struct __sk_buff *skb) {
225     return br_common(skb, 2);
226 }
227