1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "NetworkTrace"
18
19 #include "netdbpf/NetworkTraceHandler.h"
20
21 #include <android-base/macros.h>
22 #include <arpa/inet.h>
23 #include <bpf/BpfUtils.h>
24 #include <log/log.h>
25 #include <perfetto/config/android/network_trace_config.pbzero.h>
26 #include <perfetto/trace/android/network_trace.pbzero.h>
27 #include <perfetto/trace/profiling/profile_packet.pbzero.h>
28 #include <perfetto/tracing/platform.h>
29 #include <perfetto/tracing/tracing.h>
30
31 // Note: this is initializing state for a templated Perfetto type that resides
32 // in the `perfetto` namespace. This must be defined in the global scope.
33 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(android::bpf::NetworkTraceHandler);
34
35 namespace android {
36 namespace bpf {
37 using ::android::bpf::internal::NetworkTracePoller;
38 using ::perfetto::protos::pbzero::NetworkPacketBundle;
39 using ::perfetto::protos::pbzero::NetworkPacketEvent;
40 using ::perfetto::protos::pbzero::NetworkPacketTraceConfig;
41 using ::perfetto::protos::pbzero::TracePacket;
42 using ::perfetto::protos::pbzero::TrafficDirection;
43
44 // Bundling takes groups of packets with similar contextual fields (generally,
45 // all fields except timestamp and length) and summarises them in a single trace
46 // packet. For example, rather than
47 //
48 // {.timestampNs = 1, .uid = 1000, .tag = 123, .len = 72}
49 // {.timestampNs = 2, .uid = 1000, .tag = 123, .len = 100}
50 // {.timestampNs = 5, .uid = 1000, .tag = 123, .len = 456}
51 //
52 // The output will be something like
53 // {
54 // .timestamp = 1
55 // .ctx = {.uid = 1000, .tag = 123}
56 // .timestamp = [0, 1, 4], // delta encoded
57 // .length = [72, 100, 456], // should be zipped with timestamps
58 // }
59 //
60 // Most workloads have many packets from few contexts. Bundling greatly reduces
61 // the amount of redundant information written, thus reducing the overall trace
62 // size. Interning ids are similarly based on unique bundle contexts.
63
64 // Based on boost::hash_combine
65 template <typename T, typename... Rest>
HashCombine(std::size_t & seed,const T & val,const Rest &...rest)66 void HashCombine(std::size_t& seed, const T& val, const Rest&... rest) {
67 seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
68 (HashCombine(seed, rest), ...);
69 }
70
71 // Details summarises the timestamp and lengths of packets in a bundle.
72 struct BundleDetails {
73 std::vector<std::pair<uint64_t, uint32_t>> time_and_len;
74 uint64_t minTs = std::numeric_limits<uint64_t>::max();
75 uint64_t maxTs = std::numeric_limits<uint64_t>::min();
76 uint32_t bytes = 0;
77 };
78
BundleKey(const PacketTrace & pkt)79 BundleKey::BundleKey(const PacketTrace& pkt)
80 : ifindex(pkt.ifindex),
81 uid(pkt.uid),
82 tag(pkt.tag),
83 egress(pkt.egress),
84 ipProto(pkt.ipProto),
85 ipVersion(pkt.ipVersion) {
86 switch (ipProto) {
87 case IPPROTO_TCP:
88 tcpFlags = pkt.tcpFlags;
89 FALLTHROUGH_INTENDED;
90 case IPPROTO_DCCP:
91 case IPPROTO_UDP:
92 case IPPROTO_UDPLITE:
93 case IPPROTO_SCTP:
94 localPort = ntohs(pkt.egress ? pkt.sport : pkt.dport);
95 remotePort = ntohs(pkt.egress ? pkt.dport : pkt.sport);
96 break;
97 case IPPROTO_ICMP:
98 case IPPROTO_ICMPV6:
99 icmpType = ntohs(pkt.sport);
100 icmpCode = ntohs(pkt.dport);
101 break;
102 }
103 }
104
105 #define AGG_FIELDS(x) \
106 (x).ifindex, (x).uid, (x).tag, (x).egress, (x).ipProto, (x).ipVersion, \
107 (x).tcpFlags, (x).localPort, (x).remotePort, (x).icmpType, (x).icmpCode
108
operator ()(const BundleKey & a) const109 std::size_t BundleHash::operator()(const BundleKey& a) const {
110 std::size_t seed = 0;
111 HashCombine(seed, AGG_FIELDS(a));
112 return seed;
113 }
114
operator ()(const BundleKey & a,const BundleKey & b) const115 bool BundleEq::operator()(const BundleKey& a, const BundleKey& b) const {
116 return std::tie(AGG_FIELDS(a)) == std::tie(AGG_FIELDS(b));
117 }
118
119 // static
RegisterDataSource()120 void NetworkTraceHandler::RegisterDataSource() {
121 ALOGD("Registering Perfetto data source");
122 perfetto::DataSourceDescriptor dsd;
123 dsd.set_name("android.network_packets");
124 NetworkTraceHandler::Register(dsd);
125 }
126
127 // static
InitPerfettoTracing()128 void NetworkTraceHandler::InitPerfettoTracing() {
129 perfetto::TracingInitArgs args = {};
130 args.backends |= perfetto::kSystemBackend;
131 // The following line disables the Perfetto system consumer. Perfetto inlines
132 // the call to `Initialize` which allows the compiler to see that the branch
133 // with the SystemConsumerTracingBackend is not used. With LTO enabled, this
134 // strips the Perfetto consumer code and reduces the size of this binary by
135 // around 270KB total. Be careful when changing this value.
136 args.enable_system_consumer = false;
137 perfetto::Tracing::Initialize(args);
138 NetworkTraceHandler::RegisterDataSource();
139 }
140
141 // static
142 NetworkTracePoller NetworkTraceHandler::sPoller(
__anonbecfadfa0102(const std::vector<PacketTrace>& packets) 143 [](const std::vector<PacketTrace>& packets) {
144 // Trace calls the provided callback for each active session. The context
145 // gets a reference to the NetworkTraceHandler instance associated with
146 // the session and delegates writing. The corresponding handler will write
147 // with the setting specified in the trace config.
148 NetworkTraceHandler::Trace([&](NetworkTraceHandler::TraceContext ctx) {
149 perfetto::LockedHandle<NetworkTraceHandler> handle =
150 ctx.GetDataSourceLocked();
151 // The underlying handle can be invalidated between when Trace starts
152 // and GetDataSourceLocked is called, but not while the LockedHandle
153 // exists and holds the lock. Check validity prior to use.
154 if (handle.valid()) {
155 handle->Write(packets, ctx);
156 }
157 });
158 });
159
OnSetup(const SetupArgs & args)160 void NetworkTraceHandler::OnSetup(const SetupArgs& args) {
161 const std::string& raw = args.config->network_packet_trace_config_raw();
162 NetworkPacketTraceConfig::Decoder config(raw);
163
164 mPollMs = config.poll_ms();
165 if (mPollMs < 100) {
166 ALOGI("poll_ms is missing or below the 100ms minimum. Increasing to 100ms");
167 mPollMs = 100;
168 }
169
170 mInternLimit = config.intern_limit();
171 mAggregationThreshold = config.aggregation_threshold();
172 mDropLocalPort = config.drop_local_port();
173 mDropRemotePort = config.drop_remote_port();
174 mDropTcpFlags = config.drop_tcp_flags();
175 }
176
OnStart(const StartArgs &)177 void NetworkTraceHandler::OnStart(const StartArgs&) {
178 if (mIsTest) return; // Don't touch non-hermetic bpf in test.
179 mStarted = sPoller.Start(mPollMs);
180 }
181
OnStop(const StopArgs &)182 void NetworkTraceHandler::OnStop(const StopArgs&) {
183 if (mIsTest) return; // Don't touch non-hermetic bpf in test.
184 if (mStarted) sPoller.Stop();
185 mStarted = false;
186
187 // Although this shouldn't be required, there seems to be some cases when we
188 // don't fill enough of a Perfetto Chunk for Perfetto to automatically commit
189 // the traced data. This manually flushes OnStop so we commit at least once.
190 NetworkTraceHandler::Trace([&](NetworkTraceHandler::TraceContext ctx) {
191 perfetto::LockedHandle<NetworkTraceHandler> handle =
192 ctx.GetDataSourceLocked();
193 // Trace is called for all active handlers, only flush our context. Since
194 // handle doesn't have a `.get()`, use `*` and `&` to get what it points to.
195 if (&(*handle) != this) return;
196 ctx.Flush();
197 });
198 }
199
Write(const std::vector<PacketTrace> & packets,NetworkTraceHandler::TraceContext & ctx)200 void NetworkTraceHandler::Write(const std::vector<PacketTrace>& packets,
201 NetworkTraceHandler::TraceContext& ctx) {
202 // TODO: remove this fallback once Perfetto stable has support for bundles.
203 if (!mInternLimit && !mAggregationThreshold) {
204 for (const PacketTrace& pkt : packets) {
205 auto dst = ctx.NewTracePacket();
206 dst->set_timestamp(pkt.timestampNs);
207 auto* event = dst->set_network_packet();
208 event->set_length(pkt.length);
209 Fill(BundleKey(pkt), event);
210 }
211 return;
212 }
213
214 uint64_t minTs = std::numeric_limits<uint64_t>::max();
215 std::unordered_map<BundleKey, BundleDetails, BundleHash, BundleEq> bundles;
216 for (const PacketTrace& pkt : packets) {
217 BundleKey key(pkt);
218
219 // Dropping fields should remove them from the output and remove them from
220 // the aggregation key. Reset the optionals to indicate omission.
221 if (mDropTcpFlags) key.tcpFlags.reset();
222 if (mDropLocalPort) key.localPort.reset();
223 if (mDropRemotePort) key.remotePort.reset();
224
225 minTs = std::min(minTs, pkt.timestampNs);
226
227 BundleDetails& bundle = bundles[key];
228 bundle.time_and_len.emplace_back(pkt.timestampNs, pkt.length);
229 bundle.minTs = std::min(bundle.minTs, pkt.timestampNs);
230 bundle.maxTs = std::max(bundle.maxTs, pkt.timestampNs);
231 bundle.bytes += pkt.length;
232 }
233
234 NetworkTraceState* incr_state = ctx.GetIncrementalState();
235 for (const auto& kv : bundles) {
236 const BundleKey& key = kv.first;
237 const BundleDetails& details = kv.second;
238
239 auto dst = ctx.NewTracePacket();
240 dst->set_timestamp(details.minTs);
241
242 // Incremental state is only used when interning. Set the flag based on
243 // whether state was cleared. Leave the flag empty in non-intern configs.
244 if (mInternLimit > 0) {
245 if (incr_state->cleared) {
246 dst->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
247 incr_state->cleared = false;
248 } else {
249 dst->set_sequence_flags(TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
250 }
251 }
252
253 auto* event = FillWithInterning(incr_state, key, dst.get());
254
255 int count = details.time_and_len.size();
256 if (!mAggregationThreshold || count < mAggregationThreshold) {
257 protozero::PackedVarInt offsets;
258 protozero::PackedVarInt lengths;
259 for (const auto& kv : details.time_and_len) {
260 offsets.Append(kv.first - details.minTs);
261 lengths.Append(kv.second);
262 }
263
264 event->set_packet_timestamps(offsets);
265 event->set_packet_lengths(lengths);
266 } else {
267 event->set_total_duration(details.maxTs - details.minTs);
268 event->set_total_length(details.bytes);
269 event->set_total_packets(count);
270 }
271 }
272 }
273
Fill(const BundleKey & src,NetworkPacketEvent * event)274 void NetworkTraceHandler::Fill(const BundleKey& src,
275 NetworkPacketEvent* event) {
276 event->set_direction(src.egress ? TrafficDirection::DIR_EGRESS
277 : TrafficDirection::DIR_INGRESS);
278 event->set_uid(src.uid);
279 event->set_tag(src.tag);
280
281 if (src.tcpFlags.has_value()) event->set_tcp_flags(*src.tcpFlags);
282 if (src.localPort.has_value()) event->set_local_port(*src.localPort);
283 if (src.remotePort.has_value()) event->set_remote_port(*src.remotePort);
284 if (src.icmpType.has_value()) event->set_icmp_type(*src.icmpType);
285 if (src.icmpCode.has_value()) event->set_icmp_code(*src.icmpCode);
286
287 event->set_ip_proto(src.ipProto);
288
289 char ifname[IF_NAMESIZE] = {};
290 if (if_indextoname(src.ifindex, ifname) == ifname) {
291 event->set_interface(std::string(ifname));
292 } else {
293 event->set_interface("error");
294 }
295 }
296
FillWithInterning(NetworkTraceState * state,const BundleKey & key,TracePacket * dst)297 NetworkPacketBundle* NetworkTraceHandler::FillWithInterning(
298 NetworkTraceState* state, const BundleKey& key, TracePacket* dst) {
299 uint64_t iid = 0;
300 bool found = false;
301
302 if (state->iids.size() < mInternLimit) {
303 auto [iter, success] = state->iids.try_emplace(key, state->iids.size() + 1);
304 iid = iter->second;
305 found = true;
306
307 if (success) {
308 // If we successfully empaced, record the newly interned data.
309 auto* packet_context = dst->set_interned_data()->add_packet_context();
310 Fill(key, packet_context->set_ctx());
311 packet_context->set_iid(iid);
312 }
313 } else {
314 auto iter = state->iids.find(key);
315 if (iter != state->iids.end()) {
316 iid = iter->second;
317 found = true;
318 }
319 }
320
321 auto* event = dst->set_network_packet_bundle();
322 if (found) {
323 event->set_iid(iid);
324 } else {
325 Fill(key, event->set_ctx());
326 }
327
328 return event;
329 }
330
331 } // namespace bpf
332 } // namespace android
333