1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "NetworkTrace"
18 
19 #include "netdbpf/NetworkTraceHandler.h"
20 
21 #include <android-base/macros.h>
22 #include <arpa/inet.h>
23 #include <bpf/BpfUtils.h>
24 #include <log/log.h>
25 #include <perfetto/config/android/network_trace_config.pbzero.h>
26 #include <perfetto/trace/android/network_trace.pbzero.h>
27 #include <perfetto/trace/profiling/profile_packet.pbzero.h>
28 #include <perfetto/tracing/platform.h>
29 #include <perfetto/tracing/tracing.h>
30 
31 // Note: this is initializing state for a templated Perfetto type that resides
32 // in the `perfetto` namespace. This must be defined in the global scope.
33 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(android::bpf::NetworkTraceHandler);
34 
35 namespace android {
36 namespace bpf {
37 using ::android::bpf::internal::NetworkTracePoller;
38 using ::perfetto::protos::pbzero::NetworkPacketBundle;
39 using ::perfetto::protos::pbzero::NetworkPacketEvent;
40 using ::perfetto::protos::pbzero::NetworkPacketTraceConfig;
41 using ::perfetto::protos::pbzero::TracePacket;
42 using ::perfetto::protos::pbzero::TrafficDirection;
43 
44 // Bundling takes groups of packets with similar contextual fields (generally,
45 // all fields except timestamp and length) and summarises them in a single trace
46 // packet. For example, rather than
47 //
48 //   {.timestampNs = 1, .uid = 1000, .tag = 123, .len = 72}
49 //   {.timestampNs = 2, .uid = 1000, .tag = 123, .len = 100}
50 //   {.timestampNs = 5, .uid = 1000, .tag = 123, .len = 456}
51 //
52 // The output will be something like
53 //   {
54 //     .timestamp = 1
55 //     .ctx = {.uid = 1000, .tag = 123}
56 //     .timestamp = [0, 1, 4], // delta encoded
57 //     .length = [72, 100, 456], // should be zipped with timestamps
58 //   }
59 //
60 // Most workloads have many packets from few contexts. Bundling greatly reduces
61 // the amount of redundant information written, thus reducing the overall trace
62 // size. Interning ids are similarly based on unique bundle contexts.
63 
64 // Based on boost::hash_combine
65 template <typename T, typename... Rest>
HashCombine(std::size_t & seed,const T & val,const Rest &...rest)66 void HashCombine(std::size_t& seed, const T& val, const Rest&... rest) {
67   seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
68   (HashCombine(seed, rest), ...);
69 }
70 
71 // Details summarises the timestamp and lengths of packets in a bundle.
72 struct BundleDetails {
73   std::vector<std::pair<uint64_t, uint32_t>> time_and_len;
74   uint64_t minTs = std::numeric_limits<uint64_t>::max();
75   uint64_t maxTs = std::numeric_limits<uint64_t>::min();
76   uint32_t bytes = 0;
77 };
78 
BundleKey(const PacketTrace & pkt)79 BundleKey::BundleKey(const PacketTrace& pkt)
80     : ifindex(pkt.ifindex),
81       uid(pkt.uid),
82       tag(pkt.tag),
83       egress(pkt.egress),
84       ipProto(pkt.ipProto),
85       ipVersion(pkt.ipVersion) {
86   switch (ipProto) {
87     case IPPROTO_TCP:
88       tcpFlags = pkt.tcpFlags;
89       FALLTHROUGH_INTENDED;
90     case IPPROTO_DCCP:
91     case IPPROTO_UDP:
92     case IPPROTO_UDPLITE:
93     case IPPROTO_SCTP:
94       localPort = ntohs(pkt.egress ? pkt.sport : pkt.dport);
95       remotePort = ntohs(pkt.egress ? pkt.dport : pkt.sport);
96       break;
97     case IPPROTO_ICMP:
98     case IPPROTO_ICMPV6:
99       icmpType = ntohs(pkt.sport);
100       icmpCode = ntohs(pkt.dport);
101       break;
102   }
103 }
104 
105 #define AGG_FIELDS(x)                                                    \
106   (x).ifindex, (x).uid, (x).tag, (x).egress, (x).ipProto, (x).ipVersion, \
107       (x).tcpFlags, (x).localPort, (x).remotePort, (x).icmpType, (x).icmpCode
108 
operator ()(const BundleKey & a) const109 std::size_t BundleHash::operator()(const BundleKey& a) const {
110   std::size_t seed = 0;
111   HashCombine(seed, AGG_FIELDS(a));
112   return seed;
113 }
114 
operator ()(const BundleKey & a,const BundleKey & b) const115 bool BundleEq::operator()(const BundleKey& a, const BundleKey& b) const {
116   return std::tie(AGG_FIELDS(a)) == std::tie(AGG_FIELDS(b));
117 }
118 
119 // static
RegisterDataSource()120 void NetworkTraceHandler::RegisterDataSource() {
121   ALOGD("Registering Perfetto data source");
122   perfetto::DataSourceDescriptor dsd;
123   dsd.set_name("android.network_packets");
124   NetworkTraceHandler::Register(dsd);
125 }
126 
127 // static
InitPerfettoTracing()128 void NetworkTraceHandler::InitPerfettoTracing() {
129   perfetto::TracingInitArgs args = {};
130   args.backends |= perfetto::kSystemBackend;
131   // The following line disables the Perfetto system consumer. Perfetto inlines
132   // the call to `Initialize` which allows the compiler to see that the branch
133   // with the SystemConsumerTracingBackend is not used. With LTO enabled, this
134   // strips the Perfetto consumer code and reduces the size of this binary by
135   // around 270KB total. Be careful when changing this value.
136   args.enable_system_consumer = false;
137   perfetto::Tracing::Initialize(args);
138   NetworkTraceHandler::RegisterDataSource();
139 }
140 
141 // static
142 NetworkTracePoller NetworkTraceHandler::sPoller(
__anonbecfadfa0102(const std::vector<PacketTrace>& packets) 143     [](const std::vector<PacketTrace>& packets) {
144       // Trace calls the provided callback for each active session. The context
145       // gets a reference to the NetworkTraceHandler instance associated with
146       // the session and delegates writing. The corresponding handler will write
147       // with the setting specified in the trace config.
148       NetworkTraceHandler::Trace([&](NetworkTraceHandler::TraceContext ctx) {
149         perfetto::LockedHandle<NetworkTraceHandler> handle =
150             ctx.GetDataSourceLocked();
151         // The underlying handle can be invalidated between when Trace starts
152         // and GetDataSourceLocked is called, but not while the LockedHandle
153         // exists and holds the lock. Check validity prior to use.
154         if (handle.valid()) {
155           handle->Write(packets, ctx);
156         }
157       });
158     });
159 
OnSetup(const SetupArgs & args)160 void NetworkTraceHandler::OnSetup(const SetupArgs& args) {
161   const std::string& raw = args.config->network_packet_trace_config_raw();
162   NetworkPacketTraceConfig::Decoder config(raw);
163 
164   mPollMs = config.poll_ms();
165   if (mPollMs < 100) {
166     ALOGI("poll_ms is missing or below the 100ms minimum. Increasing to 100ms");
167     mPollMs = 100;
168   }
169 
170   mInternLimit = config.intern_limit();
171   mAggregationThreshold = config.aggregation_threshold();
172   mDropLocalPort = config.drop_local_port();
173   mDropRemotePort = config.drop_remote_port();
174   mDropTcpFlags = config.drop_tcp_flags();
175 }
176 
OnStart(const StartArgs &)177 void NetworkTraceHandler::OnStart(const StartArgs&) {
178   if (mIsTest) return;  // Don't touch non-hermetic bpf in test.
179   mStarted = sPoller.Start(mPollMs);
180 }
181 
OnStop(const StopArgs &)182 void NetworkTraceHandler::OnStop(const StopArgs&) {
183   if (mIsTest) return;  // Don't touch non-hermetic bpf in test.
184   if (mStarted) sPoller.Stop();
185   mStarted = false;
186 
187   // Although this shouldn't be required, there seems to be some cases when we
188   // don't fill enough of a Perfetto Chunk for Perfetto to automatically commit
189   // the traced data. This manually flushes OnStop so we commit at least once.
190   NetworkTraceHandler::Trace([&](NetworkTraceHandler::TraceContext ctx) {
191     perfetto::LockedHandle<NetworkTraceHandler> handle =
192         ctx.GetDataSourceLocked();
193     // Trace is called for all active handlers, only flush our context. Since
194     // handle doesn't have a `.get()`, use `*` and `&` to get what it points to.
195     if (&(*handle) != this) return;
196     ctx.Flush();
197   });
198 }
199 
Write(const std::vector<PacketTrace> & packets,NetworkTraceHandler::TraceContext & ctx)200 void NetworkTraceHandler::Write(const std::vector<PacketTrace>& packets,
201                                 NetworkTraceHandler::TraceContext& ctx) {
202   // TODO: remove this fallback once Perfetto stable has support for bundles.
203   if (!mInternLimit && !mAggregationThreshold) {
204     for (const PacketTrace& pkt : packets) {
205       auto dst = ctx.NewTracePacket();
206       dst->set_timestamp(pkt.timestampNs);
207       auto* event = dst->set_network_packet();
208       event->set_length(pkt.length);
209       Fill(BundleKey(pkt), event);
210     }
211     return;
212   }
213 
214   uint64_t minTs = std::numeric_limits<uint64_t>::max();
215   std::unordered_map<BundleKey, BundleDetails, BundleHash, BundleEq> bundles;
216   for (const PacketTrace& pkt : packets) {
217     BundleKey key(pkt);
218 
219     // Dropping fields should remove them from the output and remove them from
220     // the aggregation key. Reset the optionals to indicate omission.
221     if (mDropTcpFlags) key.tcpFlags.reset();
222     if (mDropLocalPort) key.localPort.reset();
223     if (mDropRemotePort) key.remotePort.reset();
224 
225     minTs = std::min(minTs, pkt.timestampNs);
226 
227     BundleDetails& bundle = bundles[key];
228     bundle.time_and_len.emplace_back(pkt.timestampNs, pkt.length);
229     bundle.minTs = std::min(bundle.minTs, pkt.timestampNs);
230     bundle.maxTs = std::max(bundle.maxTs, pkt.timestampNs);
231     bundle.bytes += pkt.length;
232   }
233 
234   NetworkTraceState* incr_state = ctx.GetIncrementalState();
235   for (const auto& kv : bundles) {
236     const BundleKey& key = kv.first;
237     const BundleDetails& details = kv.second;
238 
239     auto dst = ctx.NewTracePacket();
240     dst->set_timestamp(details.minTs);
241 
242     // Incremental state is only used when interning. Set the flag based on
243     // whether state was cleared. Leave the flag empty in non-intern configs.
244     if (mInternLimit > 0) {
245       if (incr_state->cleared) {
246         dst->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
247         incr_state->cleared = false;
248       } else {
249         dst->set_sequence_flags(TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
250       }
251     }
252 
253     auto* event = FillWithInterning(incr_state, key, dst.get());
254 
255     int count = details.time_and_len.size();
256     if (!mAggregationThreshold || count < mAggregationThreshold) {
257       protozero::PackedVarInt offsets;
258       protozero::PackedVarInt lengths;
259       for (const auto& kv : details.time_and_len) {
260         offsets.Append(kv.first - details.minTs);
261         lengths.Append(kv.second);
262       }
263 
264       event->set_packet_timestamps(offsets);
265       event->set_packet_lengths(lengths);
266     } else {
267       event->set_total_duration(details.maxTs - details.minTs);
268       event->set_total_length(details.bytes);
269       event->set_total_packets(count);
270     }
271   }
272 }
273 
Fill(const BundleKey & src,NetworkPacketEvent * event)274 void NetworkTraceHandler::Fill(const BundleKey& src,
275                                NetworkPacketEvent* event) {
276   event->set_direction(src.egress ? TrafficDirection::DIR_EGRESS
277                                   : TrafficDirection::DIR_INGRESS);
278   event->set_uid(src.uid);
279   event->set_tag(src.tag);
280 
281   if (src.tcpFlags.has_value()) event->set_tcp_flags(*src.tcpFlags);
282   if (src.localPort.has_value()) event->set_local_port(*src.localPort);
283   if (src.remotePort.has_value()) event->set_remote_port(*src.remotePort);
284   if (src.icmpType.has_value()) event->set_icmp_type(*src.icmpType);
285   if (src.icmpCode.has_value()) event->set_icmp_code(*src.icmpCode);
286 
287   event->set_ip_proto(src.ipProto);
288 
289   char ifname[IF_NAMESIZE] = {};
290   if (if_indextoname(src.ifindex, ifname) == ifname) {
291     event->set_interface(std::string(ifname));
292   } else {
293     event->set_interface("error");
294   }
295 }
296 
FillWithInterning(NetworkTraceState * state,const BundleKey & key,TracePacket * dst)297 NetworkPacketBundle* NetworkTraceHandler::FillWithInterning(
298     NetworkTraceState* state, const BundleKey& key, TracePacket* dst) {
299   uint64_t iid = 0;
300   bool found = false;
301 
302   if (state->iids.size() < mInternLimit) {
303     auto [iter, success] = state->iids.try_emplace(key, state->iids.size() + 1);
304     iid = iter->second;
305     found = true;
306 
307     if (success) {
308       // If we successfully empaced, record the newly interned data.
309       auto* packet_context = dst->set_interned_data()->add_packet_context();
310       Fill(key, packet_context->set_ctx());
311       packet_context->set_iid(iid);
312     }
313   } else {
314     auto iter = state->iids.find(key);
315     if (iter != state->iids.end()) {
316       iid = iter->second;
317       found = true;
318     }
319   }
320 
321   auto* event = dst->set_network_packet_bundle();
322   if (found) {
323     event->set_iid(iid);
324   } else {
325     Fill(key, event->set_ctx());
326   }
327 
328   return event;
329 }
330 
331 }  // namespace bpf
332 }  // namespace android
333