1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_
18 #define SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_
19 
20 #include <sqlite3.h>
21 
22 #include <array>
23 #include <deque>
24 #include <limits>
25 #include <map>
26 #include <memory>
27 #include <string>
28 #include <unordered_map>
29 #include <vector>
30 
31 #include "perfetto/trace_processor/basic_types.h"
32 #include "perfetto/trace_processor/status.h"
33 #include "src/trace_processor/sqlite/scoped_db.h"
34 #include "src/trace_processor/sqlite/sqlite_table.h"
35 
36 namespace perfetto {
37 namespace trace_processor {
38 
39 // Implements the SPAN JOIN operation between two tables on a particular column.
40 //
41 // Span:
42 // A span is a row with a timestamp and a duration. It can is used to model
43 // operations which run for a particular *span* of time.
44 //
45 // We draw spans like so (time on the x-axis):
46 // start of span->[ time where opertion is running ]<- end of span
47 //
48 // Multiple spans can happen in parallel:
49 // [      ]
50 //    [        ]
51 //   [                    ]
52 //  [ ]
53 //
54 // The above for example, models scheduling activity on a 4-core computer for a
55 // short period of time.
56 //
57 // Span join:
58 // The span join operation can be thought of as the intersection of span tables.
59 // That is, the join table has a span for each pair of spans in the child tables
60 // where the spans overlap. Because many spans are possible in parallel, an
61 // extra metadata column (labelled the "join column") is used to distinguish
62 // between the spanned tables.
63 //
64 // For a given join key suppose these were the two span tables:
65 // Table 1:   [        ]              [      ]         [ ]
66 // Table 2:          [      ]            [  ]           [      ]
67 // Output :          [ ]                 [  ]           []
68 //
69 // All other columns apart from timestamp (ts), duration (dur) and the join key
70 // are passed through unchanged.
71 class SpanJoinOperatorTable : public SqliteTable {
72  public:
73   static constexpr int kSourceGeqOpCode = SQLITE_INDEX_CONSTRAINT_FUNCTION + 1;
74 
75   // Enum indicating whether the queries on the two inner tables should
76   // emit shadows.
77   enum class EmitShadowType {
78     // Used when the table should emit all shadow slices (both present and
79     // missing partition shadows).
80     kAll,
81 
82     // Used when the table should only emit shadows for partitions which are
83     // present.
84     kPresentPartitionOnly,
85 
86     // Used when the table should emit no shadow slices.
87     kNone,
88   };
89 
90   // Contains the definition of the child tables.
91   class TableDefinition {
92    public:
93     TableDefinition() = default;
94 
95     TableDefinition(std::string name,
96                     std::string partition_col,
97                     std::vector<SqliteTable::Column> cols,
98                     EmitShadowType emit_shadow_type,
99                     uint32_t ts_idx,
100                     uint32_t dur_idx,
101                     uint32_t partition_idx);
102 
103     // Returns whether this table should emit present partition shadow slices.
ShouldEmitPresentPartitionShadow()104     bool ShouldEmitPresentPartitionShadow() const {
105       return emit_shadow_type_ == EmitShadowType::kAll ||
106              emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly;
107     }
108 
109     // Returns whether this table should emit missing partition shadow slices.
ShouldEmitMissingPartitionShadow()110     bool ShouldEmitMissingPartitionShadow() const {
111       return emit_shadow_type_ == EmitShadowType::kAll;
112     }
113 
114     // Returns whether the table is partitioned.
IsPartitioned()115     bool IsPartitioned() const { return !partition_col_.empty(); }
116 
name()117     const std::string& name() const { return name_; }
partition_col()118     const std::string& partition_col() const { return partition_col_; }
columns()119     const std::vector<SqliteTable::Column>& columns() const { return cols_; }
120 
ts_idx()121     uint32_t ts_idx() const { return ts_idx_; }
dur_idx()122     uint32_t dur_idx() const { return dur_idx_; }
partition_idx()123     uint32_t partition_idx() const { return partition_idx_; }
124 
125    private:
126     EmitShadowType emit_shadow_type_ = EmitShadowType::kNone;
127 
128     std::string name_;
129     std::string partition_col_;
130     std::vector<SqliteTable::Column> cols_;
131 
132     uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max();
133     uint32_t dur_idx_ = std::numeric_limits<uint32_t>::max();
134     uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max();
135   };
136 
137   // Stores information about a single subquery into one of the two child
138   // tables.
139   //
140   // This class is implemented as a state machine which steps from one slice to
141   // the next.
142   class Query {
143    public:
144     // Enum encoding the current state of the query in the state machine.
145     enum class State {
146       // Encodes that the current slice is a real slice (i.e. comes directly
147       // from the cursor).
148       kReal,
149 
150       // Encodes that the current slice is on a partition for which there is a
151       // real slice present.
152       kPresentPartitionShadow,
153 
154       // Encodes that the current slice is on a paritition(s) for which there is
155       // no real slice for those partition(s).
156       kMissingPartitionShadow,
157 
158       // Encodes that this query has reached the end.
159       kEof,
160     };
161 
162     Query(SpanJoinOperatorTable*, const TableDefinition*, sqlite3* db);
163     virtual ~Query();
164 
165     Query(Query&&) noexcept = default;
166     Query& operator=(Query&&) = default;
167 
168     enum class InitialEofBehavior {
169       kTreatAsEof,
170       kTreatAsMissingPartitionShadow
171     };
172 
173     // Initializes the query with the given constraints and query parameters.
174     util::Status Initialize(
175         const QueryConstraints& qc,
176         sqlite3_value** argv,
177         InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof);
178 
179     // Forwards the query to the next valid slice.
180     util::Status Next();
181 
182     // Rewinds the query to the first valid slice
183     // This is used in the mixed partitioning case where the query with no
184     // partitions is rewound to the start on every new partition.
185     util::Status Rewind();
186 
187     // Reports the column at the given index to given context.
188     void ReportSqliteResult(sqlite3_context* context, size_t index);
189 
190     // Returns whether the cursor has reached eof.
IsEof()191     bool IsEof() const { return state_ == State::kEof; }
192 
193     // Returns whether the current slice pointed to is a real slice.
IsReal()194     bool IsReal() const { return state_ == State::kReal; }
195 
196     // Returns the first partition this slice covers (for real/single partition
197     // shadows, this is the same as partition()).
198     // This partition encodes a [start, end] (closed at start and at end) range
199     // of partitions which works as the partitions are integers.
FirstPartition()200     int64_t FirstPartition() const {
201       PERFETTO_DCHECK(!IsEof());
202       return IsMissingPartitionShadow() ? missing_partition_start_
203                                         : partition();
204     }
205 
206     // Returns the last partition this slice covers (for real/single partition
207     // shadows, this is the same as partition()).
208     // This partition encodes a [start, end] (closed at start and at end) range
209     // of partitions which works as the partitions are integers.
LastPartition()210     int64_t LastPartition() const {
211       PERFETTO_DCHECK(!IsEof());
212       return IsMissingPartitionShadow() ? missing_partition_end_ - 1
213                                         : partition();
214     }
215 
216     // Returns the end timestamp of this slice adjusted to ensure that -1
217     // duration slices always returns ts.
AdjustedTsEnd()218     int64_t AdjustedTsEnd() const {
219       PERFETTO_DCHECK(!IsEof());
220       return ts_end_ - ts() == -1 ? ts() : ts_end_;
221     }
222 
ts()223     int64_t ts() const {
224       PERFETTO_DCHECK(!IsEof());
225       return ts_;
226     }
partition()227     int64_t partition() const {
228       PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned());
229       return partition_;
230     }
231 
raw_ts_end()232     int64_t raw_ts_end() const {
233       PERFETTO_DCHECK(!IsEof());
234       return ts_end_;
235     }
236 
definition()237     const TableDefinition* definition() const { return defn_; }
238 
239    private:
240     Query(Query&) = delete;
241     Query& operator=(const Query&) = delete;
242 
243     // Returns whether the current slice pointed to is a valid slice.
244     bool IsValidSlice();
245 
246     // Forwards the query to the next valid slice.
247     util::Status FindNextValidSlice();
248 
249     // Advances the query state machine by one slice.
250     util::Status NextSliceState();
251 
252     // Forwards the cursor to point to the next real slice.
253     util::Status CursorNext();
254 
255     // Creates an SQL query from the given set of constraint strings.
256     std::string CreateSqlQuery(const std::vector<std::string>& cs) const;
257 
258     // Returns whether the current slice pointed to is a present partition
259     // shadow.
IsPresentPartitionShadow()260     bool IsPresentPartitionShadow() const {
261       return state_ == State::kPresentPartitionShadow;
262     }
263 
264     // Returns whether the current slice pointed to is a missing partition
265     // shadow.
IsMissingPartitionShadow()266     bool IsMissingPartitionShadow() const {
267       return state_ == State::kMissingPartitionShadow;
268     }
269 
270     // Returns whether the current slice pointed to is an empty shadow.
IsEmptyShadow()271     bool IsEmptyShadow() const {
272       PERFETTO_DCHECK(!IsEof());
273       return (!IsReal() && ts_ == ts_end_) ||
274              (IsMissingPartitionShadow() &&
275               missing_partition_start_ == missing_partition_end_);
276     }
277 
CursorTs()278     int64_t CursorTs() const {
279       PERFETTO_DCHECK(!cursor_eof_);
280       auto ts_idx = static_cast<int>(defn_->ts_idx());
281       return sqlite3_column_int64(stmt_.get(), ts_idx);
282     }
283 
CursorDur()284     int64_t CursorDur() const {
285       PERFETTO_DCHECK(!cursor_eof_);
286       auto dur_idx = static_cast<int>(defn_->dur_idx());
287       return sqlite3_column_int64(stmt_.get(), dur_idx);
288     }
289 
CursorPartition()290     int64_t CursorPartition() const {
291       PERFETTO_DCHECK(!cursor_eof_);
292       PERFETTO_DCHECK(defn_->IsPartitioned());
293       auto partition_idx = static_cast<int>(defn_->partition_idx());
294       return sqlite3_column_int64(stmt_.get(), partition_idx);
295     }
296 
297     State state_ = State::kMissingPartitionShadow;
298     bool cursor_eof_ = false;
299 
300     // Only valid when |state_| != kEof.
301     int64_t ts_ = 0;
302     int64_t ts_end_ = std::numeric_limits<int64_t>::max();
303 
304     // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow.
305     int64_t partition_ = std::numeric_limits<int64_t>::min();
306 
307     // Only valid when |state_| == kMissingPartitionShadow.
308     int64_t missing_partition_start_ = 0;
309     int64_t missing_partition_end_ = 0;
310 
311     std::string sql_query_;
312     ScopedStmt stmt_;
313 
314     const TableDefinition* defn_ = nullptr;
315     sqlite3* db_ = nullptr;
316     SpanJoinOperatorTable* table_ = nullptr;
317   };
318 
319   // Base class for a cursor on the span table.
320   class Cursor : public SqliteTable::Cursor {
321    public:
322     Cursor(SpanJoinOperatorTable*, sqlite3* db);
323     ~Cursor() override = default;
324 
325     int Filter(const QueryConstraints& qc,
326                sqlite3_value** argv,
327                FilterHistory) override;
328     int Column(sqlite3_context* context, int N) override;
329     int Next() override;
330     int Eof() override;
331 
332    private:
333     Cursor(Cursor&) = delete;
334     Cursor& operator=(const Cursor&) = delete;
335 
336     Cursor(Cursor&&) noexcept = default;
337     Cursor& operator=(Cursor&&) = default;
338 
339     bool IsOverlappingSpan();
340 
341     util::Status FindOverlappingSpan();
342 
343     Query* FindEarliestFinishQuery();
344 
345     Query t1_;
346     Query t2_;
347 
348     Query* next_query_ = nullptr;
349 
350     // Only valid for kMixedPartition.
351     int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min();
352 
353     SpanJoinOperatorTable* table_;
354   };
355 
356   SpanJoinOperatorTable(sqlite3*, const TraceStorage*);
357 
358   static void RegisterTable(sqlite3* db, const TraceStorage* storage);
359 
360   // Table implementation.
361   util::Status Init(int, const char* const*, SqliteTable::Schema*) override;
362   std::unique_ptr<SqliteTable::Cursor> CreateCursor() override;
363   int BestIndex(const QueryConstraints& qc, BestIndexInfo* info) override;
364   int FindFunction(const char* name, FindFunctionFn* fn, void** args) override;
365 
366  private:
367   // Columns of the span operator table.
368   enum Column {
369     kTimestamp = 0,
370     kDuration = 1,
371     kPartition = 2,
372     // All other columns are dynamic depending on the joined tables.
373   };
374 
375   // Enum indicating the possible partitionings of the two tables in span join.
376   enum class PartitioningType {
377     // Used when both tables don't have a partition specified.
378     kNoPartitioning = 0,
379 
380     // Used when both tables have the same partition specified.
381     kSamePartitioning = 1,
382 
383     // Used when one table has a partition and the other table doesn't.
384     kMixedPartitioning = 2
385   };
386 
387   // Parsed version of a table descriptor.
388   struct TableDescriptor {
389     static util::Status Parse(const std::string& raw_descriptor,
390                               TableDescriptor* descriptor);
391 
IsPartitionedTableDescriptor392     bool IsPartitioned() const { return !partition_col.empty(); }
393 
394     std::string name;
395     std::string partition_col;
396   };
397 
398   // Identifier for a column by index in a given table.
399   struct ColumnLocator {
400     const TableDefinition* defn;
401     size_t col_index;
402   };
403 
IsLeftJoin()404   bool IsLeftJoin() const { return name() == "span_left_join"; }
IsOuterJoin()405   bool IsOuterJoin() const { return name() == "span_outer_join"; }
406 
partition_col()407   const std::string& partition_col() const {
408     return t1_defn_.IsPartitioned() ? t1_defn_.partition_col()
409                                     : t2_defn_.partition_col();
410   }
411 
412   util::Status CreateTableDefinition(
413       const TableDescriptor& desc,
414       EmitShadowType emit_shadow_type,
415       SpanJoinOperatorTable::TableDefinition* defn);
416 
417   std::vector<std::string> ComputeSqlConstraintsForDefinition(
418       const TableDefinition& defn,
419       const QueryConstraints& qc,
420       sqlite3_value** argv);
421 
422   std::string GetNameForGlobalColumnIndex(const TableDefinition& defn,
423                                           int global_column);
424 
425   void CreateSchemaColsForDefn(const TableDefinition& defn,
426                                std::vector<SqliteTable::Column>* cols);
427 
428   TableDefinition t1_defn_;
429   TableDefinition t2_defn_;
430   PartitioningType partitioning_;
431   std::unordered_map<size_t, ColumnLocator> global_index_to_column_locator_;
432 
433   sqlite3* const db_;
434 };
435 
436 }  // namespace trace_processor
437 }  // namespace perfetto
438 
439 #endif  // SRC_TRACE_PROCESSOR_SQLITE_SPAN_JOIN_OPERATOR_TABLE_H_
440