1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/util/message_differencer.h>
36 
37 #include <algorithm>
38 #include <memory>
39 #include <utility>
40 
41 #include <google/protobuf/stubs/callback.h>
42 #include <google/protobuf/stubs/common.h>
43 #include <google/protobuf/stubs/logging.h>
44 #include <google/protobuf/stubs/stringprintf.h>
45 #include <google/protobuf/any.h>
46 #include <google/protobuf/io/printer.h>
47 #include <google/protobuf/io/zero_copy_stream.h>
48 #include <google/protobuf/io/zero_copy_stream_impl.h>
49 #include <google/protobuf/descriptor.pb.h>
50 #include <google/protobuf/dynamic_message.h>
51 #include <google/protobuf/text_format.h>
52 #include <google/protobuf/util/field_comparator.h>
53 #include <google/protobuf/stubs/strutil.h>
54 
55 
56 // Always include as last one, otherwise it can break compilation
57 #include <google/protobuf/port_def.inc>
58 
59 namespace google {
60 namespace protobuf {
61 
62 namespace util {
63 
64 // A reporter to report the total number of diffs.
65 // TODO(ykzhu): we can improve this to take into account the value differencers.
66 class NumDiffsReporter : public google::protobuf::util::MessageDifferencer::Reporter {
67  public:
NumDiffsReporter()68   NumDiffsReporter() : num_diffs_(0) {}
69 
70   // Returns the total number of diffs.
GetNumDiffs() const71   int32 GetNumDiffs() const { return num_diffs_; }
Reset()72   void Reset() { num_diffs_ = 0; }
73 
74   // Report that a field has been added into Message2.
ReportAdded(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)75   void ReportAdded(
76       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
77       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
78           field_path) override {
79     ++num_diffs_;
80   }
81 
82   // Report that a field has been deleted from Message1.
ReportDeleted(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)83   void ReportDeleted(
84       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
85       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
86           field_path) override {
87     ++num_diffs_;
88   }
89 
90   // Report that the value of a field has been modified.
ReportModified(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)91   void ReportModified(
92       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
93       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
94           field_path) override {
95     ++num_diffs_;
96   }
97 
98  private:
99   int32 num_diffs_;
100 };
101 
102 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
103 // be used to specify multiple fields as key for key comparison.
104 // Two elements of a repeated field will be regarded as having the same key
105 // iff they have the same value for every specified key field.
106 // Note that you can also specify only one field as key.
107 class MessageDifferencer::MultipleFieldsMapKeyComparator
108     : public MessageDifferencer::MapKeyComparator {
109  public:
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)110   MultipleFieldsMapKeyComparator(
111       MessageDifferencer* message_differencer,
112       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths)
113       : message_differencer_(message_differencer),
114         key_field_paths_(key_field_paths) {
115     GOOGLE_CHECK(!key_field_paths_.empty());
116     for (int i = 0; i < key_field_paths_.size(); ++i) {
117       GOOGLE_CHECK(!key_field_paths_[i].empty());
118     }
119   }
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const FieldDescriptor * key)120   MultipleFieldsMapKeyComparator(MessageDifferencer* message_differencer,
121                                  const FieldDescriptor* key)
122       : message_differencer_(message_differencer) {
123     std::vector<const FieldDescriptor*> key_field_path;
124     key_field_path.push_back(key);
125     key_field_paths_.push_back(key_field_path);
126   }
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const127   bool IsMatch(const Message& message1, const Message& message2,
128                const std::vector<SpecificField>& parent_fields) const override {
129     for (int i = 0; i < key_field_paths_.size(); ++i) {
130       if (!IsMatchInternal(message1, message2, parent_fields,
131                            key_field_paths_[i], 0)) {
132         return false;
133       }
134     }
135     return true;
136   }
137 
138  private:
IsMatchInternal(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields,const std::vector<const FieldDescriptor * > & key_field_path,int path_index) const139   bool IsMatchInternal(
140       const Message& message1, const Message& message2,
141       const std::vector<SpecificField>& parent_fields,
142       const std::vector<const FieldDescriptor*>& key_field_path,
143       int path_index) const {
144     const FieldDescriptor* field = key_field_path[path_index];
145     std::vector<SpecificField> current_parent_fields(parent_fields);
146     if (path_index == key_field_path.size() - 1) {
147       if (field->is_repeated()) {
148         if (!message_differencer_->CompareRepeatedField(
149                 message1, message2, field, &current_parent_fields)) {
150           return false;
151         }
152       } else {
153         if (!message_differencer_->CompareFieldValueUsingParentFields(
154                 message1, message2, field, -1, -1, &current_parent_fields)) {
155           return false;
156         }
157       }
158       return true;
159     } else {
160       const Reflection* reflection1 = message1.GetReflection();
161       const Reflection* reflection2 = message2.GetReflection();
162       bool has_field1 = reflection1->HasField(message1, field);
163       bool has_field2 = reflection2->HasField(message2, field);
164       if (!has_field1 && !has_field2) {
165         return true;
166       }
167       if (has_field1 != has_field2) {
168         return false;
169       }
170       SpecificField specific_field;
171       specific_field.field = field;
172       current_parent_fields.push_back(specific_field);
173       return IsMatchInternal(reflection1->GetMessage(message1, field),
174                              reflection2->GetMessage(message2, field),
175                              current_parent_fields, key_field_path,
176                              path_index + 1);
177     }
178   }
179   MessageDifferencer* message_differencer_;
180   std::vector<std::vector<const FieldDescriptor*> > key_field_paths_;
181   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
182 };
183 
184 // Preserve the order when treating repeated field as SMART_LIST. The current
185 // implementation is to find the longest matching sequence from the first
186 // element. The optimal solution requires to use //util/diff/lcs.h, which is
187 // not open sourced yet. Overwrite this method if you want to have that.
188 // TODO(ykzhu): change to use LCS once it is open sourced.
MatchIndicesPostProcessorForSmartList(std::vector<int> * match_list1,std::vector<int> * match_list2)189 void MatchIndicesPostProcessorForSmartList(
190     std::vector<int>* match_list1, std::vector<int>* match_list2) {
191   int last_matched_index = -1;
192   for (int i = 0; i < match_list1->size(); ++i) {
193     if (match_list1->at(i) < 0) {
194       continue;
195     }
196     if (last_matched_index < 0 || match_list1->at(i) > last_matched_index) {
197       last_matched_index = match_list1->at(i);
198     } else {
199       match_list2->at(match_list1->at(i)) = -1;
200       match_list1->at(i) = -1;
201     }
202   }
203 }
204 
MapEntryKeyComparator(MessageDifferencer * message_differencer)205 MessageDifferencer::MapEntryKeyComparator::MapEntryKeyComparator(
206     MessageDifferencer* message_differencer)
207     : message_differencer_(message_differencer) {}
208 
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const209 bool MessageDifferencer::MapEntryKeyComparator::IsMatch(
210     const Message& message1, const Message& message2,
211     const std::vector<SpecificField>& parent_fields) const {
212   // Map entry has its key in the field with tag 1.  See the comment for
213   // map_entry in MessageOptions.
214   const FieldDescriptor* key = message1.GetDescriptor()->FindFieldByNumber(1);
215   // If key is not present in message1 and we're doing partial comparison or if
216   // map key is explicitly ignored treat the field as set instead,
217   const bool treat_as_set =
218       (message_differencer_->scope() == PARTIAL &&
219        !message1.GetReflection()->HasField(message1, key)) ||
220       message_differencer_->IsIgnored(message1, message2, key, parent_fields);
221 
222   std::vector<SpecificField> current_parent_fields(parent_fields);
223   if (treat_as_set) {
224     return message_differencer_->Compare(message1, message2,
225                                          &current_parent_fields);
226   }
227   return message_differencer_->CompareFieldValueUsingParentFields(
228       message1, message2, key, -1, -1, &current_parent_fields);
229 }
230 
Equals(const Message & message1,const Message & message2)231 bool MessageDifferencer::Equals(const Message& message1,
232                                 const Message& message2) {
233   MessageDifferencer differencer;
234 
235   return differencer.Compare(message1, message2);
236 }
237 
Equivalent(const Message & message1,const Message & message2)238 bool MessageDifferencer::Equivalent(const Message& message1,
239                                     const Message& message2) {
240   MessageDifferencer differencer;
241   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
242 
243   return differencer.Compare(message1, message2);
244 }
245 
ApproximatelyEquals(const Message & message1,const Message & message2)246 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
247                                              const Message& message2) {
248   MessageDifferencer differencer;
249   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
250 
251   return differencer.Compare(message1, message2);
252 }
253 
ApproximatelyEquivalent(const Message & message1,const Message & message2)254 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
255                                                  const Message& message2) {
256   MessageDifferencer differencer;
257   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
258   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
259 
260   return differencer.Compare(message1, message2);
261 }
262 
263 // ===========================================================================
264 
MessageDifferencer()265 MessageDifferencer::MessageDifferencer()
266     : reporter_(NULL),
267       field_comparator_(NULL),
268       message_field_comparison_(EQUAL),
269       scope_(FULL),
270       repeated_field_comparison_(AS_LIST),
271       map_entry_key_comparator_(this),
272       report_matches_(false),
273       report_moves_(true),
274       report_ignores_(true),
275       output_string_(nullptr),
276       match_indices_for_smart_list_callback_(
277           MatchIndicesPostProcessorForSmartList) {}
278 
~MessageDifferencer()279 MessageDifferencer::~MessageDifferencer() {
280   for (int i = 0; i < owned_key_comparators_.size(); ++i) {
281     delete owned_key_comparators_[i];
282   }
283   for (int i = 0; i < ignore_criteria_.size(); ++i) {
284     delete ignore_criteria_[i];
285   }
286 }
287 
set_field_comparator(FieldComparator * comparator)288 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
289   GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
290   field_comparator_ = comparator;
291 }
292 
set_message_field_comparison(MessageFieldComparison comparison)293 void MessageDifferencer::set_message_field_comparison(
294     MessageFieldComparison comparison) {
295   message_field_comparison_ = comparison;
296 }
297 
set_scope(Scope scope)298 void MessageDifferencer::set_scope(Scope scope) { scope_ = scope; }
299 
scope()300 MessageDifferencer::Scope MessageDifferencer::scope() { return scope_; }
301 
set_float_comparison(FloatComparison comparison)302 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
303   default_field_comparator_.set_float_comparison(
304       comparison == EXACT ? DefaultFieldComparator::EXACT
305                           : DefaultFieldComparator::APPROXIMATE);
306 }
307 
set_repeated_field_comparison(RepeatedFieldComparison comparison)308 void MessageDifferencer::set_repeated_field_comparison(
309     RepeatedFieldComparison comparison) {
310   repeated_field_comparison_ = comparison;
311 }
312 
CheckRepeatedFieldComparisons(const FieldDescriptor * field,const RepeatedFieldComparison & new_comparison)313 void MessageDifferencer::CheckRepeatedFieldComparisons(
314     const FieldDescriptor* field,
315     const RepeatedFieldComparison& new_comparison) {
316   GOOGLE_CHECK(field->is_repeated())
317       << "Field must be repeated: " << field->full_name();
318   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
319   GOOGLE_CHECK(key_comparator == NULL)
320       << "Cannot treat this repeated field as both MAP and " << new_comparison
321       << " for"
322       << " comparison.  Field name is: " << field->full_name();
323   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
324             repeated_field_comparisons_.end() ||
325         repeated_field_comparisons_[field] == new_comparison)
326       << "Cannot treat the same field as both "
327       << repeated_field_comparisons_[field] << " and " << new_comparison
328       << ". Field name is: " << field->full_name();
329 }
330 
TreatAsSet(const FieldDescriptor * field)331 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
332   CheckRepeatedFieldComparisons(field, AS_SET);
333   repeated_field_comparisons_[field] = AS_SET;
334 }
335 
TreatAsSmartSet(const FieldDescriptor * field)336 void MessageDifferencer::TreatAsSmartSet(const FieldDescriptor* field) {
337   CheckRepeatedFieldComparisons(field, AS_SMART_SET);
338   repeated_field_comparisons_[field] = AS_SMART_SET;
339 }
340 
SetMatchIndicesForSmartListCallback(std::function<void (std::vector<int> *,std::vector<int> *)> callback)341 void MessageDifferencer::SetMatchIndicesForSmartListCallback(
342     std::function<void(std::vector<int>*, std::vector<int>*)> callback) {
343   match_indices_for_smart_list_callback_ = callback;
344 }
345 
TreatAsList(const FieldDescriptor * field)346 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
347   CheckRepeatedFieldComparisons(field, AS_LIST);
348   repeated_field_comparisons_[field] = AS_LIST;
349 }
350 
TreatAsSmartList(const FieldDescriptor * field)351 void MessageDifferencer::TreatAsSmartList(const FieldDescriptor* field) {
352   CheckRepeatedFieldComparisons(field, AS_SMART_LIST);
353   repeated_field_comparisons_[field] = AS_SMART_LIST;
354 }
355 
TreatAsMap(const FieldDescriptor * field,const FieldDescriptor * key)356 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
357                                     const FieldDescriptor* key) {
358   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
359       << "Field has to be message type.  Field name is: " << field->full_name();
360   GOOGLE_CHECK(key->containing_type() == field->message_type())
361       << key->full_name()
362       << " must be a direct subfield within the repeated field "
363       << field->full_name() << ", not " << key->containing_type()->full_name();
364   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
365         repeated_field_comparisons_.end())
366       << "Cannot treat the same field as both "
367       << repeated_field_comparisons_[field]
368       << " and MAP. Field name is: " << field->full_name();
369   MapKeyComparator* key_comparator =
370       new MultipleFieldsMapKeyComparator(this, key);
371   owned_key_comparators_.push_back(key_comparator);
372   map_field_key_comparator_[field] = key_comparator;
373 }
374 
TreatAsMapWithMultipleFieldsAsKey(const FieldDescriptor * field,const std::vector<const FieldDescriptor * > & key_fields)375 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
376     const FieldDescriptor* field,
377     const std::vector<const FieldDescriptor*>& key_fields) {
378   std::vector<std::vector<const FieldDescriptor*> > key_field_paths;
379   for (int i = 0; i < key_fields.size(); ++i) {
380     std::vector<const FieldDescriptor*> key_field_path;
381     key_field_path.push_back(key_fields[i]);
382     key_field_paths.push_back(key_field_path);
383   }
384   TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
385 }
386 
TreatAsMapWithMultipleFieldPathsAsKey(const FieldDescriptor * field,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)387 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
388     const FieldDescriptor* field,
389     const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
390   GOOGLE_CHECK(field->is_repeated())
391       << "Field must be repeated: " << field->full_name();
392   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
393       << "Field has to be message type.  Field name is: " << field->full_name();
394   for (int i = 0; i < key_field_paths.size(); ++i) {
395     const std::vector<const FieldDescriptor*>& key_field_path =
396         key_field_paths[i];
397     for (int j = 0; j < key_field_path.size(); ++j) {
398       const FieldDescriptor* parent_field =
399           j == 0 ? field : key_field_path[j - 1];
400       const FieldDescriptor* child_field = key_field_path[j];
401       GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
402           << child_field->full_name()
403           << " must be a direct subfield within the field: "
404           << parent_field->full_name();
405       if (j != 0) {
406         GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
407             << parent_field->full_name() << " has to be of type message.";
408         GOOGLE_CHECK(!parent_field->is_repeated())
409             << parent_field->full_name() << " cannot be a repeated field.";
410       }
411     }
412   }
413   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
414         repeated_field_comparisons_.end())
415       << "Cannot treat the same field as both "
416       << repeated_field_comparisons_[field]
417       << " and MAP. Field name is: " << field->full_name();
418   MapKeyComparator* key_comparator =
419       new MultipleFieldsMapKeyComparator(this, key_field_paths);
420   owned_key_comparators_.push_back(key_comparator);
421   map_field_key_comparator_[field] = key_comparator;
422 }
423 
TreatAsMapUsingKeyComparator(const FieldDescriptor * field,const MapKeyComparator * key_comparator)424 void MessageDifferencer::TreatAsMapUsingKeyComparator(
425     const FieldDescriptor* field, const MapKeyComparator* key_comparator) {
426   GOOGLE_CHECK(field->is_repeated())
427       << "Field must be repeated: " << field->full_name();
428   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
429         repeated_field_comparisons_.end())
430       << "Cannot treat the same field as both "
431       << repeated_field_comparisons_[field]
432       << " and MAP. Field name is: " << field->full_name();
433   map_field_key_comparator_[field] = key_comparator;
434 }
435 
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)436 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
437   ignore_criteria_.push_back(ignore_criteria);
438 }
439 
IgnoreField(const FieldDescriptor * field)440 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
441   ignored_fields_.insert(field);
442 }
443 
SetFractionAndMargin(const FieldDescriptor * field,double fraction,double margin)444 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
445                                               double fraction, double margin) {
446   default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
447 }
448 
ReportDifferencesToString(std::string * output)449 void MessageDifferencer::ReportDifferencesToString(std::string* output) {
450   GOOGLE_DCHECK(output) << "Specified output string was NULL";
451 
452   output_string_ = output;
453   output_string_->clear();
454 }
455 
ReportDifferencesTo(Reporter * reporter)456 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
457   // If an output string is set, clear it to prevent
458   // it superceding the specified reporter.
459   if (output_string_) {
460     output_string_ = NULL;
461   }
462 
463   reporter_ = reporter;
464 }
465 
FieldBefore(const FieldDescriptor * field1,const FieldDescriptor * field2)466 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
467                                      const FieldDescriptor* field2) {
468   // Handle sentinel values (i.e. make sure NULLs are always ordered
469   // at the end of the list).
470   if (field1 == NULL) {
471     return false;
472   }
473 
474   if (field2 == NULL) {
475     return true;
476   }
477 
478   // Always order fields by their tag number
479   return (field1->number() < field2->number());
480 }
481 
Compare(const Message & message1,const Message & message2)482 bool MessageDifferencer::Compare(const Message& message1,
483                                  const Message& message2) {
484   std::vector<SpecificField> parent_fields;
485 
486   bool result = false;
487 
488   // Setup the internal reporter if need be.
489   if (output_string_) {
490     io::StringOutputStream output_stream(output_string_);
491     StreamReporter reporter(&output_stream);
492     reporter_ = &reporter;
493     result = Compare(message1, message2, &parent_fields);
494     reporter_ = NULL;
495   } else {
496     result = Compare(message1, message2, &parent_fields);
497   }
498 
499   return result;
500 }
501 
CompareWithFields(const Message & message1,const Message & message2,const std::vector<const FieldDescriptor * > & message1_fields_arg,const std::vector<const FieldDescriptor * > & message2_fields_arg)502 bool MessageDifferencer::CompareWithFields(
503     const Message& message1, const Message& message2,
504     const std::vector<const FieldDescriptor*>& message1_fields_arg,
505     const std::vector<const FieldDescriptor*>& message2_fields_arg) {
506   if (message1.GetDescriptor() != message2.GetDescriptor()) {
507     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
508                 << "descriptors.";
509     return false;
510   }
511 
512   std::vector<SpecificField> parent_fields;
513 
514   bool result = false;
515 
516   FieldDescriptorArray message1_fields(message1_fields_arg.size() + 1);
517   FieldDescriptorArray message2_fields(message2_fields_arg.size() + 1);
518 
519   std::copy(message1_fields_arg.cbegin(), message1_fields_arg.cend(),
520             message1_fields.begin());
521   std::copy(message2_fields_arg.cbegin(), message2_fields_arg.cend(),
522             message2_fields.begin());
523 
524   // Append sentinel values.
525   message1_fields[message1_fields_arg.size()] = nullptr;
526   message2_fields[message2_fields_arg.size()] = nullptr;
527 
528   std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
529   std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
530 
531   // Setup the internal reporter if need be.
532   if (output_string_) {
533     io::StringOutputStream output_stream(output_string_);
534     StreamReporter reporter(&output_stream);
535     reporter_ = &reporter;
536     result = CompareRequestedFieldsUsingSettings(
537         message1, message2, message1_fields, message2_fields, &parent_fields);
538     reporter_ = NULL;
539   } else {
540     result = CompareRequestedFieldsUsingSettings(
541         message1, message2, message1_fields, message2_fields, &parent_fields);
542   }
543 
544   return result;
545 }
546 
Compare(const Message & message1,const Message & message2,std::vector<SpecificField> * parent_fields)547 bool MessageDifferencer::Compare(const Message& message1,
548                                  const Message& message2,
549                                  std::vector<SpecificField>* parent_fields) {
550   const Descriptor* descriptor1 = message1.GetDescriptor();
551   const Descriptor* descriptor2 = message2.GetDescriptor();
552   if (descriptor1 != descriptor2) {
553     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
554                 << "descriptors. " << descriptor1->full_name() << " vs "
555                 << descriptor2->full_name();
556     return false;
557   }
558   // Expand google.protobuf.Any payload if possible.
559   if (descriptor1->full_name() == internal::kAnyFullTypeName) {
560     std::unique_ptr<Message> data1;
561     std::unique_ptr<Message> data2;
562     if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
563       // Avoid DFATAL for different descriptors in google.protobuf.Any payloads.
564       if (data1->GetDescriptor() != data2->GetDescriptor()) {
565         return false;
566       }
567       return Compare(*data1, *data2, parent_fields);
568     }
569   }
570   const Reflection* reflection1 = message1.GetReflection();
571   const Reflection* reflection2 = message2.GetReflection();
572 
573   bool unknown_compare_result = true;
574   // Ignore unknown fields in EQUIVALENT mode
575   if (message_field_comparison_ != EQUIVALENT) {
576     const UnknownFieldSet* unknown_field_set1 =
577         &reflection1->GetUnknownFields(message1);
578     const UnknownFieldSet* unknown_field_set2 =
579         &reflection2->GetUnknownFields(message2);
580     if (!CompareUnknownFields(message1, message2, *unknown_field_set1,
581                               *unknown_field_set2, parent_fields)) {
582       if (reporter_ == NULL) {
583         return false;
584       };
585       unknown_compare_result = false;
586     }
587   }
588 
589   FieldDescriptorArray message1_fields = RetrieveFields(message1, true);
590   FieldDescriptorArray message2_fields = RetrieveFields(message2, false);
591 
592   return CompareRequestedFieldsUsingSettings(
593       message1, message2,
594       message1_fields, message2_fields,
595       parent_fields) && unknown_compare_result;
596 }
597 
RetrieveFields(const Message & message,bool base_message)598 FieldDescriptorArray MessageDifferencer::RetrieveFields(const Message& message,
599                                                         bool base_message) {
600   const Descriptor* descriptor = message.GetDescriptor();
601 
602   tmp_message_fields_.clear();
603   tmp_message_fields_.reserve(descriptor->field_count() + 1);
604 
605   const Reflection* reflection = message.GetReflection();
606   if (descriptor->options().map_entry()) {
607     if (this->scope_ == PARTIAL && base_message) {
608       reflection->ListFields(message, &tmp_message_fields_);
609     } else {
610       // Map entry fields are always considered present.
611       for (int i = 0; i < descriptor->field_count(); i++) {
612         tmp_message_fields_.push_back(descriptor->field(i));
613       }
614     }
615   } else {
616     reflection->ListFields(message, &tmp_message_fields_);
617   }
618   // Add sentinel values to deal with the
619   // case where the number of the fields in
620   // each list are different.
621   tmp_message_fields_.push_back(nullptr);
622 
623   FieldDescriptorArray message_fields(tmp_message_fields_.begin(),
624                                       tmp_message_fields_.end());
625 
626   return message_fields;
627 }
628 
CompareRequestedFieldsUsingSettings(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)629 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
630     const Message& message1, const Message& message2,
631     const FieldDescriptorArray& message1_fields,
632     const FieldDescriptorArray& message2_fields,
633     std::vector<SpecificField>* parent_fields) {
634   if (scope_ == FULL) {
635     if (message_field_comparison_ == EQUIVALENT) {
636       // We need to merge the field lists of both messages (i.e.
637       // we are merely checking for a difference in field values,
638       // rather than the addition or deletion of fields).
639       FieldDescriptorArray fields_union =
640           CombineFields(message1_fields, FULL, message2_fields, FULL);
641       return CompareWithFieldsInternal(message1, message2, fields_union,
642                                        fields_union, parent_fields);
643     } else {
644       // Simple equality comparison, use the unaltered field lists.
645       return CompareWithFieldsInternal(message1, message2, message1_fields,
646                                        message2_fields, parent_fields);
647     }
648   } else {
649     if (message_field_comparison_ == EQUIVALENT) {
650       // We use the list of fields for message1 for both messages when
651       // comparing.  This way, extra fields in message2 are ignored,
652       // and missing fields in message2 use their default value.
653       return CompareWithFieldsInternal(message1, message2, message1_fields,
654                                        message1_fields, parent_fields);
655     } else {
656       // We need to consider the full list of fields for message1
657       // but only the intersection for message2.  This way, any fields
658       // only present in message2 will be ignored, but any fields only
659       // present in message1 will be marked as a difference.
660       FieldDescriptorArray fields_intersection =
661           CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL);
662       return CompareWithFieldsInternal(message1, message2, message1_fields,
663                                        fields_intersection, parent_fields);
664     }
665   }
666 }
667 
CombineFields(const FieldDescriptorArray & fields1,Scope fields1_scope,const FieldDescriptorArray & fields2,Scope fields2_scope)668 FieldDescriptorArray MessageDifferencer::CombineFields(
669     const FieldDescriptorArray& fields1, Scope fields1_scope,
670     const FieldDescriptorArray& fields2, Scope fields2_scope) {
671   int index1 = 0;
672   int index2 = 0;
673 
674   tmp_message_fields_.clear();
675 
676   while (index1 < fields1.size() && index2 < fields2.size()) {
677     const FieldDescriptor* field1 = fields1[index1];
678     const FieldDescriptor* field2 = fields2[index2];
679 
680     if (FieldBefore(field1, field2)) {
681       if (fields1_scope == FULL) {
682         tmp_message_fields_.push_back(fields1[index1]);
683       }
684       ++index1;
685     } else if (FieldBefore(field2, field1)) {
686       if (fields2_scope == FULL) {
687         tmp_message_fields_.push_back(fields2[index2]);
688       }
689       ++index2;
690     } else {
691       tmp_message_fields_.push_back(fields1[index1]);
692       ++index1;
693       ++index2;
694     }
695   }
696 
697   tmp_message_fields_.push_back(nullptr);
698 
699   FieldDescriptorArray combined_fields(tmp_message_fields_.begin(),
700                                        tmp_message_fields_.end());
701 
702   return combined_fields;
703 }
704 
CompareWithFieldsInternal(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)705 bool MessageDifferencer::CompareWithFieldsInternal(
706     const Message& message1, const Message& message2,
707     const FieldDescriptorArray& message1_fields,
708     const FieldDescriptorArray& message2_fields,
709     std::vector<SpecificField>* parent_fields) {
710   bool isDifferent = false;
711   int field_index1 = 0;
712   int field_index2 = 0;
713 
714   const Reflection* reflection1 = message1.GetReflection();
715   const Reflection* reflection2 = message2.GetReflection();
716 
717   while (true) {
718     const FieldDescriptor* field1 = message1_fields[field_index1];
719     const FieldDescriptor* field2 = message2_fields[field_index2];
720 
721     // Once we have reached sentinel values, we are done the comparison.
722     if (field1 == NULL && field2 == NULL) {
723       break;
724     }
725 
726     // Check for differences in the field itself.
727     if (FieldBefore(field1, field2)) {
728       // Field 1 is not in the field list for message 2.
729       if (IsIgnored(message1, message2, field1, *parent_fields)) {
730         // We are ignoring field1. Report the ignore and move on to
731         // the next field in message1_fields.
732         if (reporter_ != NULL) {
733           SpecificField specific_field;
734           specific_field.field = field1;
735 
736           parent_fields->push_back(specific_field);
737           if (report_ignores_) {
738             reporter_->ReportIgnored(message1, message2, *parent_fields);
739           }
740           parent_fields->pop_back();
741         }
742         ++field_index1;
743         continue;
744       }
745 
746       if (reporter_ != NULL) {
747         assert(field1 != NULL);
748         int count = field1->is_repeated()
749                         ? reflection1->FieldSize(message1, field1)
750                         : 1;
751 
752         for (int i = 0; i < count; ++i) {
753           SpecificField specific_field;
754           specific_field.field = field1;
755           specific_field.index = field1->is_repeated() ? i : -1;
756 
757           parent_fields->push_back(specific_field);
758           reporter_->ReportDeleted(message1, message2, *parent_fields);
759           parent_fields->pop_back();
760         }
761 
762         isDifferent = true;
763       } else {
764         return false;
765       }
766 
767       ++field_index1;
768       continue;
769     } else if (FieldBefore(field2, field1)) {
770       // Field 2 is not in the field list for message 1.
771       if (IsIgnored(message1, message2, field2, *parent_fields)) {
772         // We are ignoring field2. Report the ignore and move on to
773         // the next field in message2_fields.
774         if (reporter_ != NULL) {
775           SpecificField specific_field;
776           specific_field.field = field2;
777 
778           parent_fields->push_back(specific_field);
779           if (report_ignores_) {
780             reporter_->ReportIgnored(message1, message2, *parent_fields);
781           }
782           parent_fields->pop_back();
783         }
784         ++field_index2;
785         continue;
786       }
787 
788       if (reporter_ != NULL) {
789         int count = field2->is_repeated()
790                         ? reflection2->FieldSize(message2, field2)
791                         : 1;
792 
793         for (int i = 0; i < count; ++i) {
794           SpecificField specific_field;
795           specific_field.field = field2;
796           specific_field.index = field2->is_repeated() ? i : -1;
797           specific_field.new_index = specific_field.index;
798 
799           parent_fields->push_back(specific_field);
800           reporter_->ReportAdded(message1, message2, *parent_fields);
801           parent_fields->pop_back();
802         }
803 
804         isDifferent = true;
805       } else {
806         return false;
807       }
808 
809       ++field_index2;
810       continue;
811     }
812 
813     // By this point, field1 and field2 are guarenteed to point to the same
814     // field, so we can now compare the values.
815     if (IsIgnored(message1, message2, field1, *parent_fields)) {
816       // Ignore this field. Report and move on.
817       if (reporter_ != NULL) {
818         SpecificField specific_field;
819         specific_field.field = field1;
820 
821         parent_fields->push_back(specific_field);
822         if (report_ignores_) {
823           reporter_->ReportIgnored(message1, message2, *parent_fields);
824         }
825         parent_fields->pop_back();
826       }
827 
828       ++field_index1;
829       ++field_index2;
830       continue;
831     }
832 
833     bool fieldDifferent = false;
834     assert(field1 != NULL);
835     if (field1->is_repeated()) {
836       fieldDifferent =
837           !CompareRepeatedField(message1, message2, field1, parent_fields);
838       if (fieldDifferent) {
839         if (reporter_ == NULL) return false;
840         isDifferent = true;
841       }
842     } else {
843       fieldDifferent = !CompareFieldValueUsingParentFields(
844           message1, message2, field1, -1, -1, parent_fields);
845 
846       // If we have found differences, either report them or terminate if
847       // no reporter is present.
848       if (fieldDifferent && reporter_ == NULL) {
849         return false;
850       }
851 
852       if (reporter_ != NULL) {
853         SpecificField specific_field;
854         specific_field.field = field1;
855         parent_fields->push_back(specific_field);
856         if (fieldDifferent) {
857           reporter_->ReportModified(message1, message2, *parent_fields);
858           isDifferent = true;
859         } else if (report_matches_) {
860           reporter_->ReportMatched(message1, message2, *parent_fields);
861         }
862         parent_fields->pop_back();
863       }
864     }
865     // Increment the field indicies.
866     ++field_index1;
867     ++field_index2;
868   }
869 
870   return !isDifferent;
871 }
872 
IsMatch(const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const Message * message1,const Message * message2,const std::vector<SpecificField> & parent_fields,Reporter * reporter,int index1,int index2)873 bool MessageDifferencer::IsMatch(
874     const FieldDescriptor* repeated_field,
875     const MapKeyComparator* key_comparator, const Message* message1,
876     const Message* message2, const std::vector<SpecificField>& parent_fields,
877     Reporter* reporter, int index1, int index2) {
878   std::vector<SpecificField> current_parent_fields(parent_fields);
879   if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
880     return CompareFieldValueUsingParentFields(*message1, *message2,
881                                               repeated_field, index1, index2,
882                                               &current_parent_fields);
883   }
884   // Back up the Reporter and output_string_.  They will be reset in the
885   // following code.
886   Reporter* backup_reporter = reporter_;
887   std::string* output_string = output_string_;
888   reporter_ = reporter;
889   output_string_ = NULL;
890   bool match;
891 
892   if (key_comparator == NULL) {
893     match = CompareFieldValueUsingParentFields(*message1, *message2,
894                                                repeated_field, index1, index2,
895                                                &current_parent_fields);
896   } else {
897     const Reflection* reflection1 = message1->GetReflection();
898     const Reflection* reflection2 = message2->GetReflection();
899     const Message& m1 =
900         reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
901     const Message& m2 =
902         reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
903     SpecificField specific_field;
904     specific_field.field = repeated_field;
905     specific_field.index = index1;
906     specific_field.new_index = index2;
907     current_parent_fields.push_back(specific_field);
908     match = key_comparator->IsMatch(m1, m2, current_parent_fields);
909   }
910 
911   reporter_ = backup_reporter;
912   output_string_ = output_string;
913   return match;
914 }
915 
CompareRepeatedField(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,std::vector<SpecificField> * parent_fields)916 bool MessageDifferencer::CompareRepeatedField(
917     const Message& message1, const Message& message2,
918     const FieldDescriptor* repeated_field,
919     std::vector<SpecificField>* parent_fields) {
920   // the input FieldDescriptor is guaranteed to be repeated field.
921   const Reflection* reflection1 = message1.GetReflection();
922   const Reflection* reflection2 = message2.GetReflection();
923   const int count1 = reflection1->FieldSize(message1, repeated_field);
924   const int count2 = reflection2->FieldSize(message2, repeated_field);
925   const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
926 
927   // If the field is not treated as subset and no detailed reports is needed,
928   // we do a quick check on the number of the elements to avoid unnecessary
929   // comparison.
930   if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
931     return false;
932   }
933   // A match can never be found if message1 has more items than message2.
934   if (count1 > count2 && reporter_ == NULL) {
935     return false;
936   }
937 
938   // These two list are used for store the index of the correspondent
939   // element in peer repeated field.
940   std::vector<int> match_list1;
941   std::vector<int> match_list2;
942 
943   const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
944   bool smart_list = IsTreatedAsSmartList(repeated_field);
945   bool simple_list = key_comparator == nullptr &&
946                      !IsTreatedAsSet(repeated_field) &&
947                      !IsTreatedAsSmartSet(repeated_field) && !smart_list;
948 
949   // For simple lists, we avoid matching repeated field indices, saving the
950   // memory allocations that would otherwise be needed for match_list1 and
951   // match_list2.
952   if (!simple_list) {
953     // Try to match indices of the repeated fields. Return false if match fails.
954     if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
955                                    key_comparator, *parent_fields, &match_list1,
956                                    &match_list2) &&
957         reporter_ == nullptr) {
958       return false;
959     }
960   }
961 
962   bool fieldDifferent = false;
963   SpecificField specific_field;
964   specific_field.field = repeated_field;
965 
966   // At this point, we have already matched pairs of fields (with the reporting
967   // to be done later). Now to check if the paired elements are different.
968   int next_unmatched_index = 0;
969   for (int i = 0; i < count1; i++) {
970     if (simple_list && i >= count2) {
971       break;
972     }
973     if (!simple_list && match_list1[i] == -1) {
974       if (smart_list) {
975         if (reporter_ == nullptr) return false;
976         specific_field.index = i;
977         parent_fields->push_back(specific_field);
978         reporter_->ReportDeleted(message1, message2, *parent_fields);
979         parent_fields->pop_back();
980         fieldDifferent = true;
981         // Use -2 to mark this element has been reported.
982         match_list1[i] = -2;
983       }
984       continue;
985     }
986     if (smart_list) {
987       for (int j = next_unmatched_index; j < match_list1[i]; ++j) {
988         GOOGLE_CHECK_LE(0, j);
989         if (reporter_ == nullptr) return false;
990         specific_field.index = j;
991         specific_field.new_index = j;
992         parent_fields->push_back(specific_field);
993         reporter_->ReportAdded(message1, message2, *parent_fields);
994         parent_fields->pop_back();
995         fieldDifferent = true;
996         // Use -2 to mark this element has been reported.
997         match_list2[j] = -2;
998       }
999     }
1000     specific_field.index = i;
1001     if (simple_list) {
1002       specific_field.new_index = i;
1003     } else {
1004       specific_field.new_index = match_list1[i];
1005       next_unmatched_index = match_list1[i] + 1;
1006     }
1007 
1008     const bool result = CompareFieldValueUsingParentFields(
1009         message1, message2, repeated_field, i, specific_field.new_index,
1010         parent_fields);
1011 
1012     // If we have found differences, either report them or terminate if
1013     // no reporter is present. Note that ReportModified, ReportMoved, and
1014     // ReportMatched are all mutually exclusive.
1015     if (!result) {
1016       if (reporter_ == NULL) return false;
1017       parent_fields->push_back(specific_field);
1018       reporter_->ReportModified(message1, message2, *parent_fields);
1019       parent_fields->pop_back();
1020       fieldDifferent = true;
1021     } else if (reporter_ != NULL &&
1022                specific_field.index != specific_field.new_index &&
1023                !specific_field.field->is_map() && report_moves_) {
1024       parent_fields->push_back(specific_field);
1025       reporter_->ReportMoved(message1, message2, *parent_fields);
1026       parent_fields->pop_back();
1027     } else if (report_matches_ && reporter_ != NULL) {
1028       parent_fields->push_back(specific_field);
1029       reporter_->ReportMatched(message1, message2, *parent_fields);
1030       parent_fields->pop_back();
1031     }
1032   }
1033 
1034   // Report any remaining additions or deletions.
1035   for (int i = 0; i < count2; ++i) {
1036     if (!simple_list && match_list2[i] != -1) continue;
1037     if (simple_list && i < count1) continue;
1038     if (!treated_as_subset) {
1039       fieldDifferent = true;
1040     }
1041 
1042     if (reporter_ == NULL) continue;
1043     specific_field.index = i;
1044     specific_field.new_index = i;
1045     parent_fields->push_back(specific_field);
1046     reporter_->ReportAdded(message1, message2, *parent_fields);
1047     parent_fields->pop_back();
1048   }
1049 
1050   for (int i = 0; i < count1; ++i) {
1051     if (!simple_list && match_list1[i] != -1) continue;
1052     if (simple_list && i < count2) continue;
1053     assert(reporter_ != NULL);
1054     specific_field.index = i;
1055     parent_fields->push_back(specific_field);
1056     reporter_->ReportDeleted(message1, message2, *parent_fields);
1057     parent_fields->pop_back();
1058     fieldDifferent = true;
1059   }
1060   return !fieldDifferent;
1061 }
1062 
CompareFieldValue(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2)1063 bool MessageDifferencer::CompareFieldValue(const Message& message1,
1064                                            const Message& message2,
1065                                            const FieldDescriptor* field,
1066                                            int index1, int index2) {
1067   return CompareFieldValueUsingParentFields(message1, message2, field, index1,
1068                                             index2, NULL);
1069 }
1070 
CompareFieldValueUsingParentFields(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,std::vector<SpecificField> * parent_fields)1071 bool MessageDifferencer::CompareFieldValueUsingParentFields(
1072     const Message& message1, const Message& message2,
1073     const FieldDescriptor* field, int index1, int index2,
1074     std::vector<SpecificField>* parent_fields) {
1075   FieldContext field_context(parent_fields);
1076   FieldComparator::ComparisonResult result = GetFieldComparisonResult(
1077       message1, message2, field, index1, index2, &field_context);
1078 
1079   if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1080       result == FieldComparator::RECURSE) {
1081     // Get the nested messages and compare them using one of the Compare
1082     // methods.
1083     const Reflection* reflection1 = message1.GetReflection();
1084     const Reflection* reflection2 = message2.GetReflection();
1085     const Message& m1 =
1086         field->is_repeated()
1087             ? reflection1->GetRepeatedMessage(message1, field, index1)
1088             : reflection1->GetMessage(message1, field);
1089     const Message& m2 =
1090         field->is_repeated()
1091             ? reflection2->GetRepeatedMessage(message2, field, index2)
1092             : reflection2->GetMessage(message2, field);
1093 
1094     // parent_fields is used in calls to Reporter methods.
1095     if (parent_fields != NULL) {
1096       // Append currently compared field to the end of parent_fields.
1097       SpecificField specific_field;
1098       specific_field.field = field;
1099       specific_field.index = index1;
1100       specific_field.new_index = index2;
1101       parent_fields->push_back(specific_field);
1102       const bool compare_result = Compare(m1, m2, parent_fields);
1103       parent_fields->pop_back();
1104       return compare_result;
1105     } else {
1106       // Recreates parent_fields as if m1 and m2 had no parents.
1107       return Compare(m1, m2);
1108     }
1109   } else {
1110     return (result == FieldComparator::SAME);
1111   }
1112 }
1113 
CheckPathChanged(const std::vector<SpecificField> & field_path)1114 bool MessageDifferencer::CheckPathChanged(
1115     const std::vector<SpecificField>& field_path) {
1116   for (int i = 0; i < field_path.size(); ++i) {
1117     // Don't check indexes for map entries -- maps are unordered.
1118     if (field_path[i].field != NULL && field_path[i].field->is_map()) continue;
1119     if (field_path[i].index != field_path[i].new_index) return true;
1120   }
1121   return false;
1122 }
1123 
IsTreatedAsSet(const FieldDescriptor * field)1124 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
1125   if (!field->is_repeated()) return false;
1126   if (repeated_field_comparisons_.find(field) !=
1127       repeated_field_comparisons_.end()) {
1128     return repeated_field_comparisons_[field] == AS_SET;
1129   }
1130   return repeated_field_comparison_ == AS_SET;
1131 }
1132 
IsTreatedAsSmartSet(const FieldDescriptor * field)1133 bool MessageDifferencer::IsTreatedAsSmartSet(const FieldDescriptor* field) {
1134   if (!field->is_repeated()) return false;
1135   if (repeated_field_comparisons_.find(field) !=
1136       repeated_field_comparisons_.end()) {
1137     return repeated_field_comparisons_[field] == AS_SMART_SET;
1138   }
1139   return repeated_field_comparison_ == AS_SMART_SET;
1140 }
1141 
IsTreatedAsSmartList(const FieldDescriptor * field)1142 bool MessageDifferencer::IsTreatedAsSmartList(const FieldDescriptor* field) {
1143   if (!field->is_repeated()) return false;
1144   if (repeated_field_comparisons_.find(field) !=
1145       repeated_field_comparisons_.end()) {
1146     return repeated_field_comparisons_[field] == AS_SMART_LIST;
1147   }
1148   return repeated_field_comparison_ == AS_SMART_LIST;
1149 }
1150 
IsTreatedAsSubset(const FieldDescriptor * field)1151 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
1152   return scope_ == PARTIAL &&
1153          (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
1154 }
1155 
IsIgnored(const Message & message1,const Message & message2,const FieldDescriptor * field,const std::vector<SpecificField> & parent_fields)1156 bool MessageDifferencer::IsIgnored(
1157     const Message& message1, const Message& message2,
1158     const FieldDescriptor* field,
1159     const std::vector<SpecificField>& parent_fields) {
1160   if (ignored_fields_.find(field) != ignored_fields_.end()) {
1161     return true;
1162   }
1163   for (int i = 0; i < ignore_criteria_.size(); ++i) {
1164     if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
1165                                        parent_fields)) {
1166       return true;
1167     }
1168   }
1169   return false;
1170 }
1171 
IsUnknownFieldIgnored(const Message & message1,const Message & message2,const SpecificField & field,const std::vector<SpecificField> & parent_fields)1172 bool MessageDifferencer::IsUnknownFieldIgnored(
1173     const Message& message1, const Message& message2,
1174     const SpecificField& field,
1175     const std::vector<SpecificField>& parent_fields) {
1176   for (int i = 0; i < ignore_criteria_.size(); ++i) {
1177     if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
1178                                                    parent_fields)) {
1179       return true;
1180     }
1181   }
1182   return false;
1183 }
1184 
1185 const MessageDifferencer::MapKeyComparator*
GetMapKeyComparator(const FieldDescriptor * field) const1186 MessageDifferencer ::GetMapKeyComparator(const FieldDescriptor* field) const {
1187   if (!field->is_repeated()) return NULL;
1188   FieldKeyComparatorMap::const_iterator it =
1189       map_field_key_comparator_.find(field);
1190   if (it != map_field_key_comparator_.end()) {
1191     return it->second;
1192   }
1193   if (field->is_map()) {
1194     // field cannot already be treated as list or set since TreatAsList() and
1195     // TreatAsSet() call GetMapKeyComparator() and fail if it returns non-NULL.
1196     return &map_entry_key_comparator_;
1197   }
1198   return NULL;
1199 }
1200 
1201 namespace {
1202 
1203 typedef std::pair<int, const UnknownField*> IndexUnknownFieldPair;
1204 
1205 struct UnknownFieldOrdering {
operator ()google::protobuf::util::__anoncc567c560111::UnknownFieldOrdering1206   inline bool operator()(const IndexUnknownFieldPair& a,
1207                          const IndexUnknownFieldPair& b) const {
1208     if (a.second->number() < b.second->number()) return true;
1209     if (a.second->number() > b.second->number()) return false;
1210     return a.second->type() < b.second->type();
1211   }
1212 };
1213 
1214 }  // namespace
1215 
UnpackAny(const Message & any,std::unique_ptr<Message> * data)1216 bool MessageDifferencer::UnpackAny(const Message& any,
1217                                    std::unique_ptr<Message>* data) {
1218   const Reflection* reflection = any.GetReflection();
1219   const FieldDescriptor* type_url_field;
1220   const FieldDescriptor* value_field;
1221   if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1222     return false;
1223   }
1224   const std::string& type_url = reflection->GetString(any, type_url_field);
1225   std::string full_type_name;
1226   if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1227     return false;
1228   }
1229 
1230   const Descriptor* desc =
1231       any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1232           full_type_name);
1233   if (desc == NULL) {
1234     GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1235     return false;
1236   }
1237 
1238   if (dynamic_message_factory_ == NULL) {
1239     dynamic_message_factory_.reset(new DynamicMessageFactory());
1240   }
1241   data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1242   std::string serialized_value = reflection->GetString(any, value_field);
1243   if (!(*data)->ParseFromString(serialized_value)) {
1244     GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1245     return false;
1246   }
1247   return true;
1248 }
1249 
CompareUnknownFields(const Message & message1,const Message & message2,const UnknownFieldSet & unknown_field_set1,const UnknownFieldSet & unknown_field_set2,std::vector<SpecificField> * parent_field)1250 bool MessageDifferencer::CompareUnknownFields(
1251     const Message& message1, const Message& message2,
1252     const UnknownFieldSet& unknown_field_set1,
1253     const UnknownFieldSet& unknown_field_set2,
1254     std::vector<SpecificField>* parent_field) {
1255   // Ignore unknown fields in EQUIVALENT mode.
1256   if (message_field_comparison_ == EQUIVALENT) return true;
1257 
1258   if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1259     return true;
1260   }
1261 
1262   bool is_different = false;
1263 
1264   // We first sort the unknown fields by field number and type (in other words,
1265   // in tag order), making sure to preserve ordering of values with the same
1266   // tag.  This allows us to report only meaningful differences between the
1267   // two sets -- that is, differing values for the same tag.  We use
1268   // IndexUnknownFieldPairs to keep track of the field's original index for
1269   // reporting purposes.
1270   std::vector<IndexUnknownFieldPair> fields1;  // unknown_field_set1, sorted
1271   std::vector<IndexUnknownFieldPair> fields2;  // unknown_field_set2, sorted
1272   fields1.reserve(unknown_field_set1.field_count());
1273   fields2.reserve(unknown_field_set2.field_count());
1274 
1275   for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1276     fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1277   }
1278   for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1279     fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1280   }
1281 
1282   UnknownFieldOrdering is_before;
1283   std::stable_sort(fields1.begin(), fields1.end(), is_before);
1284   std::stable_sort(fields2.begin(), fields2.end(), is_before);
1285 
1286   // In order to fill in SpecificField::index, we have to keep track of how
1287   // many values we've seen with the same field number and type.
1288   // current_repeated points at the first field in this range, and
1289   // current_repeated_start{1,2} are the indexes of the first field in the
1290   // range within fields1 and fields2.
1291   const UnknownField* current_repeated = NULL;
1292   int current_repeated_start1 = 0;
1293   int current_repeated_start2 = 0;
1294 
1295   // Now that we have two sorted lists, we can detect fields which appear only
1296   // in one list or the other by traversing them simultaneously.
1297   int index1 = 0;
1298   int index2 = 0;
1299   while (index1 < fields1.size() || index2 < fields2.size()) {
1300     enum {
1301       ADDITION,
1302       DELETION,
1303       MODIFICATION,
1304       COMPARE_GROUPS,
1305       NO_CHANGE
1306     } change_type;
1307 
1308     // focus_field is the field we're currently reporting on.  (In the case
1309     // of a modification, it's the field on the left side.)
1310     const UnknownField* focus_field;
1311     bool match = false;
1312 
1313     if (index2 == fields2.size() ||
1314         (index1 < fields1.size() &&
1315          is_before(fields1[index1], fields2[index2]))) {
1316       // fields1[index1] is not present in fields2.
1317       change_type = DELETION;
1318       focus_field = fields1[index1].second;
1319     } else if (index1 == fields1.size() ||
1320                is_before(fields2[index2], fields1[index1])) {
1321       // fields2[index2] is not present in fields1.
1322       if (scope_ == PARTIAL) {
1323         // Ignore.
1324         ++index2;
1325         continue;
1326       }
1327       change_type = ADDITION;
1328       focus_field = fields2[index2].second;
1329     } else {
1330       // Field type and number are the same.  See if the values differ.
1331       change_type = MODIFICATION;
1332       focus_field = fields1[index1].second;
1333 
1334       switch (focus_field->type()) {
1335         case UnknownField::TYPE_VARINT:
1336           match = fields1[index1].second->varint() ==
1337                   fields2[index2].second->varint();
1338           break;
1339         case UnknownField::TYPE_FIXED32:
1340           match = fields1[index1].second->fixed32() ==
1341                   fields2[index2].second->fixed32();
1342           break;
1343         case UnknownField::TYPE_FIXED64:
1344           match = fields1[index1].second->fixed64() ==
1345                   fields2[index2].second->fixed64();
1346           break;
1347         case UnknownField::TYPE_LENGTH_DELIMITED:
1348           match = fields1[index1].second->length_delimited() ==
1349                   fields2[index2].second->length_delimited();
1350           break;
1351         case UnknownField::TYPE_GROUP:
1352           // We must deal with this later, after building the SpecificField.
1353           change_type = COMPARE_GROUPS;
1354           break;
1355       }
1356       if (match && change_type != COMPARE_GROUPS) {
1357         change_type = NO_CHANGE;
1358       }
1359     }
1360 
1361     if (current_repeated == NULL ||
1362         focus_field->number() != current_repeated->number() ||
1363         focus_field->type() != current_repeated->type()) {
1364       // We've started a new repeated field.
1365       current_repeated = focus_field;
1366       current_repeated_start1 = index1;
1367       current_repeated_start2 = index2;
1368     }
1369 
1370     if (change_type == NO_CHANGE && reporter_ == NULL) {
1371       // Fields were already compared and matched and we have no reporter.
1372       ++index1;
1373       ++index2;
1374       continue;
1375     }
1376 
1377     // Build the SpecificField.  This is slightly complicated.
1378     SpecificField specific_field;
1379     specific_field.unknown_field_number = focus_field->number();
1380     specific_field.unknown_field_type = focus_field->type();
1381 
1382     specific_field.unknown_field_set1 = &unknown_field_set1;
1383     specific_field.unknown_field_set2 = &unknown_field_set2;
1384 
1385     if (change_type != ADDITION) {
1386       specific_field.unknown_field_index1 = fields1[index1].first;
1387     }
1388     if (change_type != DELETION) {
1389       specific_field.unknown_field_index2 = fields2[index2].first;
1390     }
1391 
1392     // Calculate the field index.
1393     if (change_type == ADDITION) {
1394       specific_field.index = index2 - current_repeated_start2;
1395       specific_field.new_index = index2 - current_repeated_start2;
1396     } else {
1397       specific_field.index = index1 - current_repeated_start1;
1398       specific_field.new_index = index2 - current_repeated_start2;
1399     }
1400 
1401     if (IsUnknownFieldIgnored(message1, message2, specific_field,
1402                               *parent_field)) {
1403       if (reporter_ != NULL) {
1404         parent_field->push_back(specific_field);
1405         reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1406         parent_field->pop_back();
1407       }
1408       return true;
1409     }
1410 
1411     if (change_type == ADDITION || change_type == DELETION ||
1412         change_type == MODIFICATION) {
1413       if (reporter_ == NULL) {
1414         // We found a difference and we have no reporter.
1415         return false;
1416       }
1417       is_different = true;
1418     }
1419 
1420     parent_field->push_back(specific_field);
1421 
1422     switch (change_type) {
1423       case ADDITION:
1424         reporter_->ReportAdded(message1, message2, *parent_field);
1425         ++index2;
1426         break;
1427       case DELETION:
1428         reporter_->ReportDeleted(message1, message2, *parent_field);
1429         ++index1;
1430         break;
1431       case MODIFICATION:
1432         reporter_->ReportModified(message1, message2, *parent_field);
1433         ++index1;
1434         ++index2;
1435         break;
1436       case COMPARE_GROUPS:
1437         if (!CompareUnknownFields(
1438                 message1, message2, fields1[index1].second->group(),
1439                 fields2[index2].second->group(), parent_field)) {
1440           if (reporter_ == NULL) return false;
1441           is_different = true;
1442           reporter_->ReportModified(message1, message2, *parent_field);
1443         }
1444         ++index1;
1445         ++index2;
1446         break;
1447       case NO_CHANGE:
1448         ++index1;
1449         ++index2;
1450         if (report_matches_) {
1451           reporter_->ReportMatched(message1, message2, *parent_field);
1452         }
1453     }
1454 
1455     parent_field->pop_back();
1456   }
1457 
1458   return !is_different;
1459 }
1460 
1461 namespace {
1462 
1463 // Find maximum bipartite matching using the argumenting path algorithm.
1464 class MaximumMatcher {
1465  public:
1466   typedef ResultCallback2<bool, int, int> NodeMatchCallback;
1467   // MaximumMatcher takes ownership of the passed in callback and uses it to
1468   // determine whether a node on the left side of the bipartial graph matches
1469   // a node on the right side. count1 is the number of nodes on the left side
1470   // of the graph and count2 to is the number of nodes on the right side.
1471   // Every node is referred to using 0-based indices.
1472   // If a maximum match is found, the result will be stored in match_list1 and
1473   // match_list2. match_list1[i] == j means the i-th node on the left side is
1474   // matched to the j-th node on the right side and match_list2[x] == y means
1475   // the x-th node on the right side is matched to y-th node on the left side.
1476   // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1477   MaximumMatcher(int count1, int count2, NodeMatchCallback* callback,
1478                  std::vector<int>* match_list1, std::vector<int>* match_list2);
1479   // Find a maximum match and return the number of matched node pairs.
1480   // If early_return is true, this method will return 0 immediately when it
1481   // finds that not all nodes on the left side can be matched.
1482   int FindMaximumMatch(bool early_return);
1483 
1484  private:
1485   // Determines whether the node on the left side of the bipartial graph
1486   // matches the one on the right side.
1487   bool Match(int left, int right);
1488   // Find an argumenting path starting from the node v on the left side. If a
1489   // path can be found, update match_list2_ to reflect the path and return
1490   // true.
1491   bool FindArgumentPathDFS(int v, std::vector<bool>* visited);
1492 
1493   int count1_;
1494   int count2_;
1495   std::unique_ptr<NodeMatchCallback> match_callback_;
1496   std::map<std::pair<int, int>, bool> cached_match_results_;
1497   std::vector<int>* match_list1_;
1498   std::vector<int>* match_list2_;
1499   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1500 };
1501 
MaximumMatcher(int count1,int count2,NodeMatchCallback * callback,std::vector<int> * match_list1,std::vector<int> * match_list2)1502 MaximumMatcher::MaximumMatcher(int count1, int count2,
1503                                NodeMatchCallback* callback,
1504                                std::vector<int>* match_list1,
1505                                std::vector<int>* match_list2)
1506     : count1_(count1), count2_(count2), match_callback_(callback),
1507       match_list1_(match_list1), match_list2_(match_list2) {
1508   match_list1_->assign(count1, -1);
1509   match_list2_->assign(count2, -1);
1510 }
1511 
FindMaximumMatch(bool early_return)1512 int MaximumMatcher::FindMaximumMatch(bool early_return) {
1513   int result = 0;
1514   for (int i = 0; i < count1_; ++i) {
1515     std::vector<bool> visited(count1_);
1516     if (FindArgumentPathDFS(i, &visited)) {
1517       ++result;
1518     } else if (early_return) {
1519       return 0;
1520     }
1521   }
1522   // Backfill match_list1_ as we only filled match_list2_ when finding
1523   // argumenting pathes.
1524   for (int i = 0; i < count2_; ++i) {
1525     if ((*match_list2_)[i] != -1) {
1526       (*match_list1_)[(*match_list2_)[i]] = i;
1527     }
1528   }
1529   return result;
1530 }
1531 
Match(int left,int right)1532 bool MaximumMatcher::Match(int left, int right) {
1533   std::pair<int, int> p(left, right);
1534   std::map<std::pair<int, int>, bool>::iterator it =
1535       cached_match_results_.find(p);
1536   if (it != cached_match_results_.end()) {
1537     return it->second;
1538   }
1539   cached_match_results_[p] = match_callback_->Run(left, right);
1540   return cached_match_results_[p];
1541 }
1542 
FindArgumentPathDFS(int v,std::vector<bool> * visited)1543 bool MaximumMatcher::FindArgumentPathDFS(int v, std::vector<bool>* visited) {
1544   (*visited)[v] = true;
1545   // We try to match those un-matched nodes on the right side first. This is
1546   // the step that the navie greedy matching algorithm uses. In the best cases
1547   // where the greedy algorithm can find a maximum matching, we will always
1548   // find a match in this step and the performance will be identical to the
1549   // greedy algorithm.
1550   for (int i = 0; i < count2_; ++i) {
1551     int matched = (*match_list2_)[i];
1552     if (matched == -1 && Match(v, i)) {
1553       (*match_list2_)[i] = v;
1554       return true;
1555     }
1556   }
1557   // Then we try those already matched nodes and see if we can find an
1558   // alternaive match for the node matched to them.
1559   // The greedy algorithm will stop before this and fail to produce the
1560   // correct result.
1561   for (int i = 0; i < count2_; ++i) {
1562     int matched = (*match_list2_)[i];
1563     if (matched != -1 && Match(v, i)) {
1564       if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1565         (*match_list2_)[i] = v;
1566         return true;
1567       }
1568     }
1569   }
1570   return false;
1571 }
1572 
1573 }  // namespace
1574 
MatchRepeatedFieldIndices(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const std::vector<SpecificField> & parent_fields,std::vector<int> * match_list1,std::vector<int> * match_list2)1575 bool MessageDifferencer::MatchRepeatedFieldIndices(
1576     const Message& message1, const Message& message2,
1577     const FieldDescriptor* repeated_field,
1578     const MapKeyComparator* key_comparator,
1579     const std::vector<SpecificField>& parent_fields,
1580     std::vector<int>* match_list1, std::vector<int>* match_list2) {
1581   const int count1 =
1582       message1.GetReflection()->FieldSize(message1, repeated_field);
1583   const int count2 =
1584       message2.GetReflection()->FieldSize(message2, repeated_field);
1585   const bool is_treated_as_smart_set = IsTreatedAsSmartSet(repeated_field);
1586 
1587   match_list1->assign(count1, -1);
1588   match_list2->assign(count2, -1);
1589   // Ensure that we don't report differences during the matching process. Since
1590   // field comparators could potentially use this message differencer object to
1591   // perform further comparisons, turn off reporting here and re-enable it
1592   // before returning.
1593   Reporter* reporter = reporter_;
1594   reporter_ = NULL;
1595   NumDiffsReporter num_diffs_reporter;
1596   std::vector<int32> num_diffs_list1;
1597   if (is_treated_as_smart_set) {
1598     num_diffs_list1.assign(count1, kint32max);
1599   }
1600 
1601   bool success = true;
1602   // Find potential match if this is a special repeated field.
1603   if (scope_ == PARTIAL) {
1604     // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1605     // doesn't necessarily imply Compare(b, c). Therefore a naive greedy
1606     // algorithm will fail to find a maximum matching.
1607     // Here we use the augmenting path algorithm.
1608     MaximumMatcher::NodeMatchCallback* callback = ::google::protobuf::NewPermanentCallback(
1609         this, &MessageDifferencer::IsMatch, repeated_field, key_comparator,
1610         &message1, &message2, parent_fields, nullptr);
1611     MaximumMatcher matcher(count1, count2, callback, match_list1, match_list2);
1612     // If diff info is not needed, we should end the matching process as
1613     // soon as possible if not all items can be matched.
1614     bool early_return = (reporter == nullptr);
1615     int match_count = matcher.FindMaximumMatch(early_return);
1616     if (match_count != count1 && early_return) return false;
1617     success = success && (match_count == count1);
1618   } else {
1619     int start_offset = 0;
1620     // If the two repeated fields are treated as sets, optimize for the case
1621     // where both start with same items stored in the same order.
1622     if (IsTreatedAsSet(repeated_field) || is_treated_as_smart_set ||
1623         IsTreatedAsSmartList(repeated_field)) {
1624       start_offset = std::min(count1, count2);
1625       for (int i = 0; i < count1 && i < count2; i++) {
1626         if (IsMatch(repeated_field, key_comparator, &message1, &message2,
1627                     parent_fields, nullptr, i, i)) {
1628           match_list1->at(i) = i;
1629           match_list2->at(i) = i;
1630         } else {
1631           start_offset = i;
1632           break;
1633         }
1634       }
1635     }
1636     for (int i = start_offset; i < count1; ++i) {
1637       // Indicates any matched elements for this repeated field.
1638       bool match = false;
1639       int matched_j = -1;
1640 
1641       for (int j = start_offset; j < count2; j++) {
1642         if (match_list2->at(j) != -1) {
1643           if (!is_treated_as_smart_set || num_diffs_list1[i] == 0 ||
1644               num_diffs_list1[match_list2->at(j)] == 0) {
1645             continue;
1646           }
1647         }
1648 
1649         if (is_treated_as_smart_set) {
1650           num_diffs_reporter.Reset();
1651           match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1652                           parent_fields, &num_diffs_reporter, i, j);
1653         } else {
1654           match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1655                           parent_fields, nullptr, i, j);
1656         }
1657 
1658         if (is_treated_as_smart_set) {
1659           if (match) {
1660             num_diffs_list1[i] = 0;
1661           } else if (repeated_field->cpp_type() ==
1662                      FieldDescriptor::CPPTYPE_MESSAGE) {
1663             // Replace with the one with fewer diffs.
1664             const int32 num_diffs = num_diffs_reporter.GetNumDiffs();
1665             if (num_diffs < num_diffs_list1[i]) {
1666               // If j has been already matched to some element, ensure the
1667               // current num_diffs is smaller.
1668               if (match_list2->at(j) == -1 ||
1669                   num_diffs < num_diffs_list1[match_list2->at(j)]) {
1670                 num_diffs_list1[i] = num_diffs;
1671                 match = true;
1672               }
1673             }
1674           }
1675         }
1676 
1677         if (match) {
1678           matched_j = j;
1679           if (!is_treated_as_smart_set || num_diffs_list1[i] == 0) {
1680             break;
1681           }
1682         }
1683       }
1684 
1685       match = (matched_j != -1);
1686       if (match) {
1687         if (is_treated_as_smart_set && match_list2->at(matched_j) != -1) {
1688           // This is to revert the previously matched index in list2.
1689           match_list1->at(match_list2->at(matched_j)) = -1;
1690           match = false;
1691         }
1692         match_list1->at(i) = matched_j;
1693         match_list2->at(matched_j) = i;
1694       }
1695       if (!match && reporter == nullptr) return false;
1696       success = success && match;
1697     }
1698   }
1699 
1700   if (IsTreatedAsSmartList(repeated_field)) {
1701     match_indices_for_smart_list_callback_(match_list1, match_list2);
1702   }
1703 
1704   reporter_ = reporter;
1705 
1706   return success;
1707 }
1708 
GetFieldComparisonResult(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,const FieldContext * field_context)1709 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1710     const Message& message1, const Message& message2,
1711     const FieldDescriptor* field, int index1, int index2,
1712     const FieldContext* field_context) {
1713   FieldComparator* comparator = field_comparator_ != NULL
1714                                     ? field_comparator_
1715                                     : &default_field_comparator_;
1716   return comparator->Compare(message1, message2, field, index1, index2,
1717                              field_context);
1718 }
1719 
1720 // ===========================================================================
1721 
Reporter()1722 MessageDifferencer::Reporter::Reporter() {}
~Reporter()1723 MessageDifferencer::Reporter::~Reporter() {}
1724 
1725 // ===========================================================================
1726 
MapKeyComparator()1727 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
~MapKeyComparator()1728 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1729 
1730 // ===========================================================================
1731 
IgnoreCriteria()1732 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
~IgnoreCriteria()1733 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1734 
1735 // ===========================================================================
1736 
1737 // Note that the printer's delimiter is not used, because if we are given a
1738 // printer, we don't know its delimiter.
StreamReporter(io::ZeroCopyOutputStream * output)1739 MessageDifferencer::StreamReporter::StreamReporter(
1740     io::ZeroCopyOutputStream* output)
1741     : printer_(new io::Printer(output, '$')),
1742       delete_printer_(true),
1743       report_modified_aggregates_(false) {}
1744 
StreamReporter(io::Printer * printer)1745 MessageDifferencer::StreamReporter::StreamReporter(io::Printer* printer)
1746     : printer_(printer),
1747       delete_printer_(false),
1748       report_modified_aggregates_(false) {}
1749 
~StreamReporter()1750 MessageDifferencer::StreamReporter::~StreamReporter() {
1751   if (delete_printer_) delete printer_;
1752 }
1753 
PrintPath(const std::vector<SpecificField> & field_path,bool left_side)1754 void MessageDifferencer::StreamReporter::PrintPath(
1755     const std::vector<SpecificField>& field_path, bool left_side) {
1756   for (int i = 0; i < field_path.size(); ++i) {
1757     if (i > 0) {
1758       printer_->Print(".");
1759     }
1760 
1761     SpecificField specific_field = field_path[i];
1762 
1763     if (specific_field.field != NULL) {
1764       if (specific_field.field->is_extension()) {
1765         printer_->Print("($name$)", "name", specific_field.field->full_name());
1766       } else {
1767         printer_->PrintRaw(specific_field.field->name());
1768       }
1769       if (specific_field.field->is_map()) {
1770         // Don't print index in a map field; they are semantically unordered.
1771         continue;
1772       }
1773     } else {
1774       printer_->PrintRaw(StrCat(specific_field.unknown_field_number));
1775     }
1776     if (left_side && specific_field.index >= 0) {
1777       printer_->Print("[$name$]", "name", StrCat(specific_field.index));
1778     }
1779     if (!left_side && specific_field.new_index >= 0) {
1780       printer_->Print("[$name$]", "name",
1781                       StrCat(specific_field.new_index));
1782     }
1783   }
1784 }
1785 
PrintPath(const std::vector<SpecificField> & field_path,bool left_side,const Message & message)1786 void MessageDifferencer::StreamReporter::PrintPath(
1787     const std::vector<SpecificField>& field_path, bool left_side,
1788     const Message& message) {
1789   PrintPath(field_path, left_side);
1790 }
1791 
PrintValue(const Message & message,const std::vector<SpecificField> & field_path,bool left_side)1792 void MessageDifferencer::StreamReporter::PrintValue(
1793     const Message& message, const std::vector<SpecificField>& field_path,
1794     bool left_side) {
1795   const SpecificField& specific_field = field_path.back();
1796   const FieldDescriptor* field = specific_field.field;
1797   if (field != NULL) {
1798     std::string output;
1799     int index = left_side ? specific_field.index : specific_field.new_index;
1800     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1801       const Reflection* reflection = message.GetReflection();
1802       const Message& field_message =
1803           field->is_repeated()
1804               ? reflection->GetRepeatedMessage(message, field, index)
1805               : reflection->GetMessage(message, field);
1806       output = field_message.ShortDebugString();
1807       if (output.empty()) {
1808         printer_->Print("{ }");
1809       } else {
1810         printer_->Print("{ $name$ }", "name", output);
1811       }
1812     } else {
1813       TextFormat::PrintFieldValueToString(message, field, index, &output);
1814       printer_->PrintRaw(output);
1815     }
1816   } else {
1817     const UnknownFieldSet* unknown_fields =
1818         (left_side ? specific_field.unknown_field_set1
1819                    : specific_field.unknown_field_set2);
1820     const UnknownField* unknown_field =
1821         &unknown_fields->field(left_side ? specific_field.unknown_field_index1
1822                                          : specific_field.unknown_field_index2);
1823     PrintUnknownFieldValue(unknown_field);
1824   }
1825 }
1826 
PrintUnknownFieldValue(const UnknownField * unknown_field)1827 void MessageDifferencer::StreamReporter::PrintUnknownFieldValue(
1828     const UnknownField* unknown_field) {
1829   GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1830 
1831   std::string output;
1832   switch (unknown_field->type()) {
1833     case UnknownField::TYPE_VARINT:
1834       output = StrCat(unknown_field->varint());
1835       break;
1836     case UnknownField::TYPE_FIXED32:
1837       output = StrCat(
1838           "0x", strings::Hex(unknown_field->fixed32(), strings::ZERO_PAD_8));
1839       break;
1840     case UnknownField::TYPE_FIXED64:
1841       output = StrCat(
1842           "0x", strings::Hex(unknown_field->fixed64(), strings::ZERO_PAD_16));
1843       break;
1844     case UnknownField::TYPE_LENGTH_DELIMITED:
1845       output = StringPrintf(
1846           "\"%s\"", CEscape(unknown_field->length_delimited()).c_str());
1847       break;
1848     case UnknownField::TYPE_GROUP:
1849       // TODO(kenton):  Print the contents of the group like we do for
1850       //   messages.  Requires an equivalent of ShortDebugString() for
1851       //   UnknownFieldSet.
1852       output = "{ ... }";
1853       break;
1854   }
1855   printer_->PrintRaw(output);
1856 }
1857 
Print(const std::string & str)1858 void MessageDifferencer::StreamReporter::Print(const std::string& str) {
1859   printer_->Print(str.c_str());
1860 }
1861 
ReportAdded(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1862 void MessageDifferencer::StreamReporter::ReportAdded(
1863     const Message& message1, const Message& message2,
1864     const std::vector<SpecificField>& field_path) {
1865   printer_->Print("added: ");
1866   PrintPath(field_path, false, message2);
1867   printer_->Print(": ");
1868   PrintValue(message2, field_path, false);
1869   printer_->Print("\n");  // Print for newlines.
1870 }
1871 
ReportDeleted(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1872 void MessageDifferencer::StreamReporter::ReportDeleted(
1873     const Message& message1, const Message& message2,
1874     const std::vector<SpecificField>& field_path) {
1875   printer_->Print("deleted: ");
1876   PrintPath(field_path, true, message1);
1877   printer_->Print(": ");
1878   PrintValue(message1, field_path, true);
1879   printer_->Print("\n");  // Print for newlines
1880 }
1881 
ReportModified(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1882 void MessageDifferencer::StreamReporter::ReportModified(
1883     const Message& message1, const Message& message2,
1884     const std::vector<SpecificField>& field_path) {
1885   if (!report_modified_aggregates_ && field_path.back().field == NULL) {
1886     if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
1887       // Any changes to the subfields have already been printed.
1888       return;
1889     }
1890   } else if (!report_modified_aggregates_) {
1891     if (field_path.back().field->cpp_type() ==
1892         FieldDescriptor::CPPTYPE_MESSAGE) {
1893       // Any changes to the subfields have already been printed.
1894       return;
1895     }
1896   }
1897 
1898   printer_->Print("modified: ");
1899   PrintPath(field_path, true, message1);
1900   if (CheckPathChanged(field_path)) {
1901     printer_->Print(" -> ");
1902     PrintPath(field_path, false, message2);
1903   }
1904   printer_->Print(": ");
1905   PrintValue(message1, field_path, true);
1906   printer_->Print(" -> ");
1907   PrintValue(message2, field_path, false);
1908   printer_->Print("\n");  // Print for newlines.
1909 }
1910 
ReportMoved(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1911 void MessageDifferencer::StreamReporter::ReportMoved(
1912     const Message& message1, const Message& message2,
1913     const std::vector<SpecificField>& field_path) {
1914   printer_->Print("moved: ");
1915   PrintPath(field_path, true, message1);
1916   printer_->Print(" -> ");
1917   PrintPath(field_path, false, message2);
1918   printer_->Print(" : ");
1919   PrintValue(message1, field_path, true);
1920   printer_->Print("\n");  // Print for newlines.
1921 }
1922 
ReportMatched(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1923 void MessageDifferencer::StreamReporter::ReportMatched(
1924     const Message& message1, const Message& message2,
1925     const std::vector<SpecificField>& field_path) {
1926   printer_->Print("matched: ");
1927   PrintPath(field_path, true, message1);
1928   if (CheckPathChanged(field_path)) {
1929     printer_->Print(" -> ");
1930     PrintPath(field_path, false, message2);
1931   }
1932   printer_->Print(" : ");
1933   PrintValue(message1, field_path, true);
1934   printer_->Print("\n");  // Print for newlines.
1935 }
1936 
ReportIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1937 void MessageDifferencer::StreamReporter::ReportIgnored(
1938     const Message& message1, const Message& message2,
1939     const std::vector<SpecificField>& field_path) {
1940   printer_->Print("ignored: ");
1941   PrintPath(field_path, true, message1);
1942   if (CheckPathChanged(field_path)) {
1943     printer_->Print(" -> ");
1944     PrintPath(field_path, false, message2);
1945   }
1946   printer_->Print("\n");  // Print for newlines.
1947 }
1948 
ReportUnknownFieldIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1949 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
1950     const Message& message1, const Message& message2,
1951     const std::vector<SpecificField>& field_path) {
1952   printer_->Print("ignored: ");
1953   PrintPath(field_path, true, message1);
1954   if (CheckPathChanged(field_path)) {
1955     printer_->Print(" -> ");
1956     PrintPath(field_path, false, message2);
1957   }
1958   printer_->Print("\n");  // Print for newlines.
1959 }
1960 
1961 MessageDifferencer::MapKeyComparator*
CreateMultipleFieldsMapKeyComparator(const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)1962 MessageDifferencer::CreateMultipleFieldsMapKeyComparator(
1963     const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
1964   return new MultipleFieldsMapKeyComparator(this, key_field_paths);
1965 }
1966 
1967 }  // namespace util
1968 }  // namespace protobuf
1969 }  // namespace google
1970