1// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15syntax = "proto2";
16
17package pw.log;
18
19option java_package = "pw.rpc.proto";
20option java_outer_classname = "Log";
21
22// A log with a tokenized message, a string message, or dropped indicator.  A
23// message can be one of three types:
24//
25//  1. A tokenized log message (recommended for production)
26//  2. A non-tokenized log message (good for development)
27//  3. A "log missed" tombstone, indicating that some logs were dropped
28//
29// Size analysis:
30//
31// For tokenized log messages in the common case; including the proto tag for
32// the field (so adding the fields gives the total proto message size):
33//
34//  - message_tokenized  - 6-12 bytes, depending on # and value of arguments
35//  - line_level         - 3 bytes; 4 bytes if line > 2048 (uncommon)
36//  - timestamp          - 3 bytes; assuming delta encoding
37//  - thread_tokenized   - 3 bytes
38//
39// Total:
40//
41//    6-12 bytes - log
42//    9-15 bytes - log + level + line
43//   12-18 bytes - log + level + line + timestamp
44//   15-21 bytes - log + level + line + timestamp + task
45//
46// An analysis of a project's log token database revealed the following
47// distribution of the number of arguments to log messages:
48//
49//   # args   # messages
50//     0         2,700
51//     1         2,400
52//     2         1,200
53//     3+        1,000
54//
55// Note: The below proto makes some compromises compared to what one might
56// expect for a "clean" proto design, in order to shave bytes off of the
57// messages. It is critical that the log messages are as small as possible to
58// enable storing more logs in limited memory. This is why, for example, there
59// is no separate "DroppedLog" type, or a "TokenizedLog" and "StringLog", which
60// would add at least 2 extra bytes per message
61// Note: Time-related fields will likely support specifying the time as a ratio
62// (period) and an absolute time separate from the current delta fields.
63message LogEntry {
64  // The tokenized log message. Internally, the format has a 32-bit token
65  // followed by the arguments for that message. The unformatted log string
66  // corresponding to the token in the token database must follow this format:
67  //
68  //   file|module|message
69  //
70  // For example:
71  //
72  //   ../boot/bluetooth.cc|BOOT|Bluetooth is on the fritz; error code: %d
73  //
74  // Note: The level and flags are not included since level and flags are
75  // runtime values and so cannot be tokenized.
76  //
77  // Size analysis:
78  //
79  //   tag+wire = 1 byte
80  //   size     = 1 byte; payload will almost always be < 127 bytes
81  //   payload  = N bytes; typically 4-10 in practice
82  //
83  // Total: 2 + N ~= 6-12 bytes
84  optional bytes message_tokenized = 1;
85
86  // Packed log level and line number. Structure:
87  //
88  //   Level: Bottom 3 bits; level = line_level & 0x7
89  //   Line: Remaining bits; line = (line_level >> 3)
90  //
91  // Note: This packing saves two bytes per log message in most cases compared
92  // to having line and level separately; and is zero-cost if the log backend
93  // omits the line number.
94  optional uint32 line_level = 2;
95
96  // Some log messages have flags to indicate for example assert or PII. The
97  // particular flags are product- and implementation-dependent. When no flags
98  // are present, the field is omitted entirely.
99  optional uint32 flags = 3;
100
101  // The task or thread that created the log message.
102  //
103  // In practice, the task token and tag should be just 3 bytes, since a 14 bit
104  // token for the task name should be enough.
105  optional uint32 thread_tokenized = 4;
106
107  // Timestamp. Note: The units here are TBD and will likely require a separate
108  // mechanism to indicate units. This field is likely to change as we figure
109  // out the right strategy for timestamps in Pigweed. This is a variable-sized
110  // integer to enable scaling this up to a uint64 later on without impacting
111  // the wire format.
112  optional int64 timestamp = 5;
113
114  // Time since the last entry. Generally, one of timestamp or this field will
115  // be specified. This enables delta encoding when batching entries together.
116  //
117  // Size analysis for this field including tag and varint:
118  //
119  //           < 127 ms gap == 127 ms      ==  7 bits == 2 bytes
120  //        < 16,000 ms gap ==  16 seconds == 14 bits == 3 bytes
121  //     < 2,000,000 ms gap ==  35 minutes == 21 bits == 4 bytes
122  //   < 300,000,000 ms gap ==  74 hours   == 28 bits == 5 bytes
123  //
124  // Log bursts will thus consume just 2 bytes (tag + up to 127ms delta) for
125  // the timestamp, which is a good improvement over timestamp in many cases.
126  // Note: The units of this field are TBD and will likely require a separate
127  // mechanism to indicate units. The calculations above assume milliseconds
128  // and may change if the units differ.
129  optional int64 elapsed_time_since_last_entry = 6;
130
131  // Fully formatted textual log message.
132  optional string message_string = 16;
133
134  // For non-tokenized logging, the file name.
135  optional string file = 17;
136
137  // String representation of the task that created the log message.
138  optional string thread_string = 18;
139
140  // When the log buffers are full but more logs come in, the logs are counted
141  // and a special log message is omitted with only counts for the number of
142  // messages dropped. The timestamp indicates the time that the "missed logs"
143  // message was inserted into the queue.
144  //
145  // Missed logs messages will only have one of the timestamp fields and these
146  // counters specified.
147  optional uint32 dropped = 19;
148  optional uint32 dropped_warning_or_above = 20;
149
150  // Some messages are associated with trace events, which may carry additional
151  // contextual data. This is a tuple of a data format string which could be
152  // used by the decoder to identify the data (e.g. printf-style tokens) and the
153  // data itself in bytes.
154  optional string data_format_string = 21;
155  optional bytes data = 22;
156}
157
158message LogRequest {}
159message LogEntries {
160  repeated LogEntry entries = 1;
161}
162
163service Logs {
164  rpc Get(LogRequest) returns (stream LogEntries) {}
165}
166