1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "rtc_tools/rtc_event_log_visualizer/alerts.h"
12
13 #include <stdio.h>
14
15 #include <algorithm>
16 #include <limits>
17 #include <map>
18 #include <string>
19
20 #include "logging/rtc_event_log/rtc_event_processor.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/format_macros.h"
23 #include "rtc_base/logging.h"
24 #include "rtc_base/numerics/sequence_number_util.h"
25 #include "rtc_base/strings/string_builder.h"
26
27 namespace webrtc {
28
Print(FILE * file)29 void TriageHelper::Print(FILE* file) {
30 fprintf(file, "========== TRIAGE NOTIFICATIONS ==========\n");
31 for (const auto& alert : triage_alerts_) {
32 fprintf(file, "%d %s. First occurrence at %3.3lf\n", alert.second.count,
33 alert.second.explanation.c_str(), alert.second.first_occurrence);
34 }
35 fprintf(file, "========== END TRIAGE NOTIFICATIONS ==========\n");
36 }
37
AnalyzeStreamGaps(const ParsedRtcEventLog & parsed_log,PacketDirection direction)38 void TriageHelper::AnalyzeStreamGaps(const ParsedRtcEventLog& parsed_log,
39 PacketDirection direction) {
40 // With 100 packets/s (~800kbps), false positives would require 10 s without
41 // data.
42 constexpr int64_t kMaxSeqNumJump = 1000;
43 // With a 90 kHz clock, false positives would require 10 s without data.
44 constexpr int64_t kTicksPerMillisec = 90;
45 constexpr int64_t kCaptureTimeGraceMs = 10000;
46
47 std::string seq_num_explanation =
48 direction == kIncomingPacket
49 ? "Incoming RTP sequence number jumps more than 1000. Counter may "
50 "have been reset or rewritten incorrectly in a group call."
51 : "Outgoing RTP sequence number jumps more than 1000. Counter may "
52 "have been reset.";
53 std::string capture_time_explanation =
54 direction == kIncomingPacket ? "Incoming capture time jumps more than "
55 "10s. Clock might have been reset."
56 : "Outgoing capture time jumps more than "
57 "10s. Clock might have been reset.";
58 TriageAlertType seq_num_alert = direction == kIncomingPacket
59 ? TriageAlertType::kIncomingSeqNumJump
60 : TriageAlertType::kOutgoingSeqNumJump;
61 TriageAlertType capture_time_alert =
62 direction == kIncomingPacket ? TriageAlertType::kIncomingCaptureTimeJump
63 : TriageAlertType::kOutgoingCaptureTimeJump;
64
65 const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
66
67 // Check for gaps in sequence numbers and capture timestamps.
68 for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
69 if (IsRtxSsrc(parsed_log, direction, stream.ssrc)) {
70 continue;
71 }
72 auto packets = stream.packet_view;
73 if (packets.empty()) {
74 continue;
75 }
76 SeqNumUnwrapper<uint16_t> seq_num_unwrapper;
77 int64_t last_seq_num =
78 seq_num_unwrapper.Unwrap(packets[0].header.sequenceNumber);
79 SeqNumUnwrapper<uint32_t> capture_time_unwrapper;
80 int64_t last_capture_time =
81 capture_time_unwrapper.Unwrap(packets[0].header.timestamp);
82 int64_t last_log_time_ms = packets[0].log_time_ms();
83 for (const auto& packet : packets) {
84 if (packet.log_time_us() > segment_end_us) {
85 // Only process the first (LOG_START, LOG_END) segment.
86 break;
87 }
88
89 int64_t seq_num = seq_num_unwrapper.Unwrap(packet.header.sequenceNumber);
90 if (std::abs(seq_num - last_seq_num) > kMaxSeqNumJump) {
91 Alert(seq_num_alert, config_.GetCallTimeSec(packet.log_time_us()),
92 seq_num_explanation);
93 }
94 last_seq_num = seq_num;
95
96 int64_t capture_time =
97 capture_time_unwrapper.Unwrap(packet.header.timestamp);
98 if (std::abs(capture_time - last_capture_time) >
99 kTicksPerMillisec *
100 (kCaptureTimeGraceMs + packet.log_time_ms() - last_log_time_ms)) {
101 Alert(capture_time_alert, config_.GetCallTimeSec(packet.log_time_us()),
102 capture_time_explanation);
103 }
104 last_capture_time = capture_time;
105 }
106 }
107 }
108
AnalyzeTransmissionGaps(const ParsedRtcEventLog & parsed_log,PacketDirection direction)109 void TriageHelper::AnalyzeTransmissionGaps(const ParsedRtcEventLog& parsed_log,
110 PacketDirection direction) {
111 constexpr int64_t kMaxRtpTransmissionGap = 500000;
112 constexpr int64_t kMaxRtcpTransmissionGap = 3000000;
113 std::string rtp_explanation =
114 direction == kIncomingPacket
115 ? "No RTP packets received for more than 500ms. This indicates a "
116 "network problem. Temporary video freezes and choppy or robotic "
117 "audio is unavoidable. Unnecessary BWE drops is a known issue."
118 : "No RTP packets sent for more than 500 ms. This might be an issue "
119 "with the pacer.";
120 std::string rtcp_explanation =
121 direction == kIncomingPacket
122 ? "No RTCP packets received for more than 3 s. Could be a longer "
123 "connection outage"
124 : "No RTCP packets sent for more than 3 s. This is most likely a "
125 "bug.";
126 TriageAlertType rtp_alert = direction == kIncomingPacket
127 ? TriageAlertType::kIncomingRtpGap
128 : TriageAlertType::kOutgoingRtpGap;
129 TriageAlertType rtcp_alert = direction == kIncomingPacket
130 ? TriageAlertType::kIncomingRtcpGap
131 : TriageAlertType::kOutgoingRtcpGap;
132
133 const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
134
135 // TODO(terelius): The parser could provide a list of all packets, ordered
136 // by time, for each direction.
137 std::multimap<int64_t, const LoggedRtpPacket*> rtp_in_direction;
138 for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
139 for (const LoggedRtpPacket& rtp_packet : stream.packet_view)
140 rtp_in_direction.emplace(rtp_packet.log_time_us(), &rtp_packet);
141 }
142 absl::optional<int64_t> last_rtp_time;
143 for (const auto& kv : rtp_in_direction) {
144 int64_t timestamp = kv.first;
145 if (timestamp > segment_end_us) {
146 // Only process the first (LOG_START, LOG_END) segment.
147 break;
148 }
149 int64_t duration = timestamp - last_rtp_time.value_or(0);
150 if (last_rtp_time.has_value() && duration > kMaxRtpTransmissionGap) {
151 // No packet sent/received for more than 500 ms.
152 Alert(rtp_alert, config_.GetCallTimeSec(timestamp), rtp_explanation);
153 }
154 last_rtp_time.emplace(timestamp);
155 }
156
157 absl::optional<int64_t> last_rtcp_time;
158 if (direction == kIncomingPacket) {
159 for (const auto& rtcp : parsed_log.incoming_rtcp_packets()) {
160 if (rtcp.log_time_us() > segment_end_us) {
161 // Only process the first (LOG_START, LOG_END) segment.
162 break;
163 }
164 int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
165 if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
166 // No feedback sent/received for more than 2000 ms.
167 Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time_us()),
168 rtcp_explanation);
169 }
170 last_rtcp_time.emplace(rtcp.log_time_us());
171 }
172 } else {
173 for (const auto& rtcp : parsed_log.outgoing_rtcp_packets()) {
174 if (rtcp.log_time_us() > segment_end_us) {
175 // Only process the first (LOG_START, LOG_END) segment.
176 break;
177 }
178 int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
179 if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
180 // No feedback sent/received for more than 2000 ms.
181 Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time_us()),
182 rtcp_explanation);
183 }
184 last_rtcp_time.emplace(rtcp.log_time_us());
185 }
186 }
187 }
188
189 // TODO(terelius): Notifications could possibly be generated by the same code
190 // that produces the graphs. There is some code duplication that could be
191 // avoided, but that might be solved anyway when we move functionality from the
192 // analyzer to the parser.
AnalyzeLog(const ParsedRtcEventLog & parsed_log)193 void TriageHelper::AnalyzeLog(const ParsedRtcEventLog& parsed_log) {
194 AnalyzeStreamGaps(parsed_log, kIncomingPacket);
195 AnalyzeStreamGaps(parsed_log, kOutgoingPacket);
196 AnalyzeTransmissionGaps(parsed_log, kIncomingPacket);
197 AnalyzeTransmissionGaps(parsed_log, kOutgoingPacket);
198
199 const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
200
201 int64_t first_occurrence = parsed_log.last_timestamp();
202 constexpr double kMaxLossFraction = 0.05;
203 // Loss feedback
204 int64_t total_lost_packets = 0;
205 int64_t total_expected_packets = 0;
206 for (auto& bwe_update : parsed_log.bwe_loss_updates()) {
207 if (bwe_update.log_time_us() > segment_end_us) {
208 // Only process the first (LOG_START, LOG_END) segment.
209 break;
210 }
211 int64_t lost_packets = static_cast<double>(bwe_update.fraction_lost) / 255 *
212 bwe_update.expected_packets;
213 total_lost_packets += lost_packets;
214 total_expected_packets += bwe_update.expected_packets;
215 if (bwe_update.fraction_lost >= 255 * kMaxLossFraction) {
216 first_occurrence = std::min(first_occurrence, bwe_update.log_time_us());
217 }
218 }
219 double avg_outgoing_loss =
220 static_cast<double>(total_lost_packets) / total_expected_packets;
221 if (avg_outgoing_loss > kMaxLossFraction) {
222 Alert(TriageAlertType::kOutgoingHighLoss, first_occurrence,
223 "More than 5% of outgoing packets lost.");
224 }
225 }
226
227 } // namespace webrtc
228