1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/utils/step_intersection.h"
16 
17 #include "tensorflow/core/lib/gtl/map_util.h"
18 #include "tensorflow/core/platform/logging.h"
19 #include "tensorflow/core/profiler/utils/timespan.h"
20 
21 namespace tensorflow {
22 namespace profiler {
23 
24 namespace {
25 
26 // Returns the timespan in this step (across all cores).
StepTimespan(const PerCoreStepInfo & percore_stepinfo)27 Timespan StepTimespan(const PerCoreStepInfo& percore_stepinfo) {
28   uint64 min_ps = kuint64max;
29   uint64 max_ps = 0;
30   for (const auto& core_stepinfo : percore_stepinfo.step_info_per_core()) {
31     const auto& stepinfo = core_stepinfo.second;
32     uint64 begin_ps = stepinfo.begin_ps();
33     uint64 end_ps = begin_ps + stepinfo.duration_ps();
34     min_ps = std::min(min_ps, begin_ps);
35     max_ps = std::max(max_ps, end_ps);
36   }
37   return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
38                            : Timespan();
39 }
40 
41 // Returns the timespan across all steps in the given step_db.
AllStepsTimespan(const StepDatabaseResult & step_db)42 Timespan AllStepsTimespan(const StepDatabaseResult& step_db) {
43   uint64 min_ps = kuint64max;
44   uint64 max_ps = 0;
45   for (const auto& step : step_db.step_sequence()) {
46     Timespan timespan = StepTimespan(step);
47     uint64 begin_ps = timespan.begin_ps();
48     uint64 end_ps = timespan.end_ps();
49     min_ps = std::min(min_ps, begin_ps);
50     max_ps = std::max(max_ps, end_ps);
51   }
52   return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
53                            : Timespan();
54 }
55 
56 struct AlignmentInfo {
57   StepsAlignment alignment;
58   double similarity;
59 };
60 
61 // Computes the similarity between the given two steps. The closer their
62 // timespans are, the larger is the similarity.
StepSimilarity(const PerCoreStepInfo & subordinate_step,const PerCoreStepInfo & chief_step)63 double StepSimilarity(const PerCoreStepInfo& subordinate_step,
64                       const PerCoreStepInfo& chief_step) {
65   Timespan subordinate_timespan = StepTimespan(subordinate_step);
66   Timespan chief_timespan = StepTimespan(chief_step);
67   return chief_timespan.OverlappedDurationPs(subordinate_timespan);
68 }
69 
70 // If the subordinate steps and the chief steps are aligned at the given anchor
71 // points (i.e. at the subordinate_anchor step on the subordinate sequence, at
72 // the chief_anchor step on the chief sequence), returns the corresponding
73 // AlignmentInfo.
ComputeAlignmentInfo(const StepDatabaseResult & subordinate,uint32 subordinate_anchor,const StepDatabaseResult & chief,uint32 chief_anchor)74 AlignmentInfo ComputeAlignmentInfo(const StepDatabaseResult& subordinate,
75                                    uint32 subordinate_anchor,
76                                    const StepDatabaseResult& chief,
77                                    uint32 chief_anchor) {
78   // Assumes that the step at subordinate_anchor on the subordinate sequence is
79   // aligned with the step at the chief_anchor on the chief sequence. Then the
80   // number of steps before the anchor is the minimum of the number of steps
81   // before the anchor in the subordinate and that before the anchor in the
82   // chief. Similarly, the number of steps after the anchor is the minimum of
83   // the number of steps after the anchor in the subordinate and that after the
84   // anchor in the chief.
85   uint32 pre_anchor_steps = std::min(subordinate_anchor, chief_anchor);
86   uint32 post_anchor_steps =
87       std::min(subordinate.step_sequence_size() - subordinate_anchor,
88                chief.step_sequence_size() - chief_anchor);
89   // total number of steps aligned = pre_anchor_steps + post_anchor_steps.
90   uint32 alignment_steps = pre_anchor_steps + post_anchor_steps;
91 
92   double similarity = 0;
93   // Where the aligned steps begin on the subordinate sequence.
94   uint32 begin_subordinate_idx = subordinate_anchor - pre_anchor_steps;
95   // Where the aligned steps begin on the chief sequence.
96   uint32 begin_chief_idx = chief_anchor - pre_anchor_steps;
97 
98   for (uint32 i = 0; i < alignment_steps; i++) {
99     // Accumulates the similarity at each step.
100     similarity +=
101         StepSimilarity(subordinate.step_sequence(begin_subordinate_idx + i),
102                        chief.step_sequence(begin_chief_idx + i));
103   }
104   StepsAlignment alignment = {begin_subordinate_idx, begin_chief_idx,
105                               alignment_steps};
106   return {alignment, similarity};
107 }
108 
109 // Returns the best alignment for aligning subordinate against chief.
FindStepsAlignment(const StepDatabaseResult & subordinate,const StepDatabaseResult & chief)110 StepsAlignment FindStepsAlignment(const StepDatabaseResult& subordinate,
111                                   const StepDatabaseResult& chief) {
112   double max_similarity = -1;
113   StepsAlignment alignment = {0, 0, 0};
114   if (subordinate.step_sequence_size() == 0 || chief.step_sequence_size() == 0)
115     return alignment;
116   for (auto c = 0; c < chief.step_sequence_size(); c++) {
117     AlignmentInfo info =
118         ComputeAlignmentInfo(subordinate, /*subordinate_anchor=*/0, chief, c);
119     if (info.similarity <= max_similarity) continue;
120     max_similarity = info.similarity;
121     alignment = info.alignment;
122   }
123   for (auto s = 1; s < subordinate.step_sequence_size(); s++) {
124     // s starts at 1 instead of 0, because the loop above already considers
125     // (s=0, c=0).
126     AlignmentInfo info =
127         ComputeAlignmentInfo(subordinate, s, chief, /*chief_anchor=*/0);
128     if (info.similarity <= max_similarity) continue;
129     max_similarity = info.similarity;
130     alignment = info.alignment;
131   }
132   return alignment;
133 }
134 
StringStepsAlignment(const StepsAlignment & alignment)135 std::string StringStepsAlignment(const StepsAlignment& alignment) {
136   return absl::StrCat(
137       "[begin_subordinate_idx: ", alignment.begin_subordinate_idx,
138       ", begin_chief_idx: ", alignment.begin_chief_idx,
139       ", num_steps: ", alignment.num_steps, "]");
140 }
141 
StringDstStepNumbers(const std::vector<uint32> & step_numbers)142 std::string StringDstStepNumbers(const std::vector<uint32>& step_numbers) {
143   std::string str;
144   absl::StrAppend(&str, "[");
145   for (auto i = 0; i < step_numbers.size(); i++) {
146     if (i > 0) absl::StrAppend(&str, ", ");
147     absl::StrAppend(&str, step_numbers[i]);
148   }
149   absl::StrAppend(&str, "]");
150   return str;
151 }
152 
StringSrcToDstIndexMap(uint32 src_first_step_idx,uint32 num_steps)153 std::string StringSrcToDstIndexMap(uint32 src_first_step_idx,
154                                    uint32 num_steps) {
155   std::string str;
156   absl::StrAppend(&str, "[");
157   for (auto i = 0; i < num_steps; i++) {
158     if (i > 0) absl::StrAppend(&str, ", ");
159     absl::StrAppend(&str, src_first_step_idx + i, ":", i);
160   }
161   absl::StrAppend(&str, "]");
162   return str;
163 }
164 
165 }  // namespace
166 
StepIntersection(uint32 max_steps,const absl::flat_hash_map<uint32,const StepDatabaseResult * > & perhost_stepdb)167 StepIntersection::StepIntersection(
168     uint32 max_steps,
169     const absl::flat_hash_map<uint32, const StepDatabaseResult*>&
170         perhost_stepdb) {
171   // Figures out the host with the shortest timespan among their steps (called
172   // this host the "chief").
173   chief_host_id_ = kuint32max;
174   uint64 min_duration_ps = kuint64max;
175   const StepDatabaseResult* chief_step_db = nullptr;
176   for (const auto& hostid_stepdb : perhost_stepdb) {
177     auto host_id = hostid_stepdb.first;
178     const auto& step_db = hostid_stepdb.second;
179     Timespan timespan = AllStepsTimespan(*step_db);
180     if (timespan.duration_ps() < min_duration_ps) {
181       chief_host_id_ = host_id;
182       chief_step_db = step_db;
183       min_duration_ps = timespan.duration_ps();
184     }
185   }
186   if (chief_host_id_ == kuint32max) {
187     // There is no step at all on any host.
188     steps_dropped_ = 0;
189     begin_chief_idx_ = 0;
190     end_chief_idx_ = 0;
191     return;
192   }
193 
194   uint32 max_begin_chief_idx = 0;
195   uint32 min_end_chief_idx = kuint32max;
196   // Aligns the steps in all hosts with those in the chief.
197   for (const auto& hostid_stepdb : perhost_stepdb) {
198     auto host_id = hostid_stepdb.first;
199     const auto& step_db = hostid_stepdb.second;
200     if (host_id == chief_host_id_) {
201       // Simply aligns with itself.
202       perhost_alignment_[host_id] = {
203           /*begin_subordinate_idx=*/0, /*begin_chief_idx=*/0,
204           static_cast<uint32>(step_db->step_sequence_size())};
205     } else {
206       perhost_alignment_[host_id] =
207           FindStepsAlignment(*step_db, *chief_step_db);
208     }
209     // Intersects this host's alignment with other hosts' alignments.
210     uint32 host_begin_chief_idx = perhost_alignment_[host_id].begin_chief_idx;
211     max_begin_chief_idx = std::max(max_begin_chief_idx, host_begin_chief_idx);
212     uint32 host_end_chief_idx = perhost_alignment_[host_id].begin_chief_idx +
213                                 perhost_alignment_[host_id].num_steps;
214     min_end_chief_idx = std::min(min_end_chief_idx, host_end_chief_idx);
215   }
216   DCHECK(max_begin_chief_idx <= min_end_chief_idx);
217 
218   begin_chief_idx_ = max_begin_chief_idx;
219 
220   // Takes max_steps into account.
221   uint32 num_steps = min_end_chief_idx - max_begin_chief_idx;
222   if (num_steps > max_steps) {
223     steps_dropped_ = num_steps - max_steps;
224     // TODO(ckluk): Drops from both ends to avoid incomplete steps at the
225     // beginning and end of the profile.
226     end_chief_idx_ = max_begin_chief_idx + max_steps;
227   } else {
228     steps_dropped_ = 0;
229     end_chief_idx_ = min_end_chief_idx;
230   }
231 }
232 
DstStepNumbers() const233 std::vector<uint32> StepIntersection::DstStepNumbers() const {
234   // TODO(ckluk): Honors training-loop boundaries (if more than one loop
235   // sampled).
236   std::vector<uint32> result;
237   result.reserve(NumSteps());
238   for (uint32 i = 0; i < NumSteps(); i++) {
239     result.push_back(i);
240   }
241   return result;
242 }
243 
FirstStepIndex(uint32 host_id) const244 uint32 StepIntersection::FirstStepIndex(uint32 host_id) const {
245   const auto* alignment = gtl::FindOrNull(perhost_alignment_, host_id);
246   if (alignment == nullptr) return 0;
247   DCHECK(alignment->begin_chief_idx <= begin_chief_idx_);
248   uint32 shift = begin_chief_idx_ - alignment->begin_chief_idx;
249   uint32 begin_subordinate_idx = alignment->begin_subordinate_idx + shift;
250   return begin_subordinate_idx;
251 }
252 
DebugString() const253 std::string StepIntersection::DebugString() const {
254   std::string str;
255   absl::StrAppend(&str, "chief host id_: ", chief_host_id_, "\n");
256   absl::StrAppend(&str, "begin_chief_idx_: ", begin_chief_idx_,
257                   ", num_steps: ", NumSteps(), "\n");
258   absl::StrAppend(
259       &str, "DstStepNumbers(): ", StringDstStepNumbers(DstStepNumbers()), "\n");
260 
261   std::vector<uint32> host_ids;
262   host_ids.reserve(perhost_alignment_.size());
263   for (const auto& hostid_alignment : perhost_alignment_) {
264     auto host_id = hostid_alignment.first;
265     host_ids.push_back(host_id);
266   }
267   absl::c_sort(host_ids);
268 
269   absl::StrAppend(&str, "perhost_alignment:\n");
270   for (const auto host_id : host_ids) {
271     const auto* ptr = gtl::FindOrNull(perhost_alignment_, host_id);
272     if (ptr == nullptr) continue;
273     absl::StrAppend(&str, "host: ", host_id,
274                     ", step-alignment: ", StringStepsAlignment(*ptr), "\n");
275   }
276   absl::StrAppend(&str, "SrcToDstIndexMap():\n");
277   for (const auto host_id : host_ids) {
278     absl::StrAppend(&str, "host: ", host_id, ", src-to-dst-index-map: ",
279                     StringSrcToDstIndexMap(FirstStepIndex(host_id), NumSteps()),
280                     "\n");
281   }
282   return str;
283 }
284 
285 }  // namespace profiler
286 }  // namespace tensorflow
287