1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file defines static methods and classes for comparing Protocol
36 // Messages (see //google/protobuf/util/message_differencer.h for more
37 // information).
38 
39 #include <google/protobuf/util/message_differencer.h>
40 
41 #include <algorithm>
42 #include <memory>
43 #ifndef _SHARED_PTR_H
44 #include <google/protobuf/stubs/shared_ptr.h>
45 #endif
46 #include <utility>
47 
48 #include <google/protobuf/stubs/callback.h>
49 #include <google/protobuf/stubs/common.h>
50 #include <google/protobuf/stubs/logging.h>
51 #include <google/protobuf/stubs/stringprintf.h>
52 #include <google/protobuf/any.h>
53 #include <google/protobuf/io/printer.h>
54 #include <google/protobuf/io/zero_copy_stream.h>
55 #include <google/protobuf/io/zero_copy_stream_impl.h>
56 #include <google/protobuf/dynamic_message.h>
57 #include <google/protobuf/text_format.h>
58 #include <google/protobuf/util/field_comparator.h>
59 #include <google/protobuf/stubs/strutil.h>
60 
61 namespace google {
62 namespace protobuf {
63 
64 namespace util {
65 
66 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
67 // be used to specify multiple fields as key for key comparison.
68 // Two elements of a repeated field will be regarded as having the same key
69 // iff they have the same value for every specified key field.
70 // Note that you can also specify only one field as key.
71 class MessageDifferencer::MultipleFieldsMapKeyComparator
72     : public MessageDifferencer::MapKeyComparator {
73  public:
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const vector<vector<const FieldDescriptor * >> & key_field_paths)74   MultipleFieldsMapKeyComparator(
75       MessageDifferencer* message_differencer,
76       const vector<vector<const FieldDescriptor*> >& key_field_paths)
77         : message_differencer_(message_differencer),
78           key_field_paths_(key_field_paths) {
79     GOOGLE_CHECK(!key_field_paths_.empty());
80     for (int i = 0; i < key_field_paths_.size(); ++i) {
81       GOOGLE_CHECK(!key_field_paths_[i].empty());
82     }
83   }
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const FieldDescriptor * key)84   MultipleFieldsMapKeyComparator(
85       MessageDifferencer* message_differencer,
86       const FieldDescriptor* key)
87         : message_differencer_(message_differencer) {
88     vector<const FieldDescriptor*> key_field_path;
89     key_field_path.push_back(key);
90     key_field_paths_.push_back(key_field_path);
91   }
IsMatch(const Message & message1,const Message & message2,const vector<SpecificField> & parent_fields) const92   virtual bool IsMatch(
93       const Message& message1,
94       const Message& message2,
95       const vector<SpecificField>& parent_fields) const {
96     for (int i = 0; i < key_field_paths_.size(); ++i) {
97       if (!IsMatchInternal(message1, message2, parent_fields,
98                            key_field_paths_[i], 0)) {
99         return false;
100       }
101     }
102     return true;
103   }
104  private:
IsMatchInternal(const Message & message1,const Message & message2,const vector<SpecificField> & parent_fields,const vector<const FieldDescriptor * > & key_field_path,int path_index) const105   bool IsMatchInternal(
106       const Message& message1,
107       const Message& message2,
108       const vector<SpecificField>& parent_fields,
109       const vector<const FieldDescriptor*>& key_field_path,
110       int path_index) const {
111     const FieldDescriptor* field = key_field_path[path_index];
112     vector<SpecificField> current_parent_fields(parent_fields);
113     if (path_index == key_field_path.size() - 1) {
114       if (field->is_repeated()) {
115         if (!message_differencer_->CompareRepeatedField(
116             message1, message2, field, &current_parent_fields)) {
117           return false;
118         }
119       } else {
120         if (!message_differencer_->CompareFieldValueUsingParentFields(
121             message1, message2, field, -1, -1, &current_parent_fields)) {
122           return false;
123         }
124       }
125       return true;
126     } else {
127       const Reflection* reflection1 = message1.GetReflection();
128       const Reflection* reflection2 = message2.GetReflection();
129       bool has_field1 = reflection1->HasField(message1, field);
130       bool has_field2 = reflection2->HasField(message2, field);
131       if (!has_field1 && !has_field2) {
132         return true;
133       }
134       if (has_field1 != has_field2) {
135         return false;
136       }
137       SpecificField specific_field;
138       specific_field.field = field;
139       current_parent_fields.push_back(specific_field);
140       return IsMatchInternal(
141           reflection1->GetMessage(message1, field),
142           reflection2->GetMessage(message2, field),
143           current_parent_fields,
144           key_field_path,
145           path_index + 1);
146     }
147   }
148   MessageDifferencer* message_differencer_;
149   vector<vector<const FieldDescriptor*> > key_field_paths_;
150   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
151 };
152 
Equals(const Message & message1,const Message & message2)153 bool MessageDifferencer::Equals(const Message& message1,
154                                 const Message& message2) {
155   MessageDifferencer differencer;
156 
157   return differencer.Compare(message1, message2);
158 }
159 
Equivalent(const Message & message1,const Message & message2)160 bool MessageDifferencer::Equivalent(const Message& message1,
161                                     const Message& message2) {
162   MessageDifferencer differencer;
163   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
164 
165   return differencer.Compare(message1, message2);
166 }
167 
ApproximatelyEquals(const Message & message1,const Message & message2)168 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
169                                              const Message& message2) {
170   MessageDifferencer differencer;
171   differencer.set_float_comparison(
172       MessageDifferencer::APPROXIMATE);
173 
174   return differencer.Compare(message1, message2);
175 }
176 
ApproximatelyEquivalent(const Message & message1,const Message & message2)177 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
178                                                  const Message& message2) {
179   MessageDifferencer differencer;
180   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
181   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
182 
183   return differencer.Compare(message1, message2);
184 }
185 
186 // ===========================================================================
187 
MessageDifferencer()188 MessageDifferencer::MessageDifferencer()
189     : reporter_(NULL),
190       field_comparator_(NULL),
191       message_field_comparison_(EQUAL),
192       scope_(FULL),
193       repeated_field_comparison_(AS_LIST),
194       report_matches_(false),
195       output_string_(NULL) { }
196 
~MessageDifferencer()197 MessageDifferencer::~MessageDifferencer() {
198   for (int i = 0; i < owned_key_comparators_.size(); ++i) {
199     delete owned_key_comparators_[i];
200   }
201   for (int i = 0; i < ignore_criteria_.size(); ++i) {
202     delete ignore_criteria_[i];
203   }
204 }
205 
set_field_comparator(FieldComparator * comparator)206 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
207   GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
208   field_comparator_ = comparator;
209 }
210 
set_message_field_comparison(MessageFieldComparison comparison)211 void MessageDifferencer::set_message_field_comparison(
212     MessageFieldComparison comparison) {
213   message_field_comparison_ = comparison;
214 }
215 
set_scope(Scope scope)216 void MessageDifferencer::set_scope(Scope scope) {
217   scope_ = scope;
218 }
219 
scope()220 MessageDifferencer::Scope MessageDifferencer::scope() {
221   return scope_;
222 }
223 
set_float_comparison(FloatComparison comparison)224 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
225   default_field_comparator_.set_float_comparison(
226       comparison == EXACT ?
227       DefaultFieldComparator::EXACT : DefaultFieldComparator::APPROXIMATE);
228 }
229 
set_repeated_field_comparison(RepeatedFieldComparison comparison)230 void MessageDifferencer::set_repeated_field_comparison(
231     RepeatedFieldComparison comparison) {
232   repeated_field_comparison_ = comparison;
233 }
234 
TreatAsSet(const FieldDescriptor * field)235 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
236   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
237                                << field->full_name();
238   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
239   GOOGLE_CHECK(key_comparator == NULL)
240       << "Cannot treat this repeated field as both Map and Set for"
241       << " comparison.  Field name is: " << field->full_name();
242   GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
243       << "Cannot treat the same field as both SET and LIST. Field name is: "
244       << field->full_name();
245   set_fields_.insert(field);
246 }
247 
TreatAsList(const FieldDescriptor * field)248 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
249   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
250                               << field->full_name();
251   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
252   GOOGLE_CHECK(key_comparator == NULL)
253       << "Cannot treat this repeated field as both Map and Set for"
254       << " comparison.  Field name is: " << field->full_name();
255   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
256       << "Cannot treat the same field as both SET and LIST. Field name is: "
257       << field->full_name();
258   list_fields_.insert(field);
259 }
260 
TreatAsMap(const FieldDescriptor * field,const FieldDescriptor * key)261 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
262                                     const FieldDescriptor* key) {
263   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
264                                << field->full_name();
265   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
266       << "Field has to be message type.  Field name is: "
267       << field->full_name();
268   GOOGLE_CHECK(key->containing_type() == field->message_type())
269       << key->full_name()
270       << " must be a direct subfield within the repeated field "
271       << field->full_name() << ", not " << key->containing_type()->full_name();
272   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
273       << "Cannot treat this repeated field as both Map and Set for "
274       << "comparison.";
275   GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
276       << "Cannot treat this repeated field as both Map and List for "
277       << "comparison.";
278   MapKeyComparator* key_comparator =
279       new MultipleFieldsMapKeyComparator(this, key);
280   owned_key_comparators_.push_back(key_comparator);
281   map_field_key_comparator_[field] = key_comparator;
282 }
283 
TreatAsMapWithMultipleFieldsAsKey(const FieldDescriptor * field,const vector<const FieldDescriptor * > & key_fields)284 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
285     const FieldDescriptor* field,
286     const vector<const FieldDescriptor*>& key_fields) {
287   vector<vector<const FieldDescriptor*> > key_field_paths;
288   for (int i = 0; i < key_fields.size(); ++i) {
289     vector<const FieldDescriptor*> key_field_path;
290     key_field_path.push_back(key_fields[i]);
291     key_field_paths.push_back(key_field_path);
292   }
293   TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
294 }
295 
TreatAsMapWithMultipleFieldPathsAsKey(const FieldDescriptor * field,const vector<vector<const FieldDescriptor * >> & key_field_paths)296 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
297     const FieldDescriptor* field,
298     const vector<vector<const FieldDescriptor*> >& key_field_paths) {
299   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
300                               << field->full_name();
301   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
302       << "Field has to be message type.  Field name is: "
303       << field->full_name();
304   for (int i = 0; i < key_field_paths.size(); ++i) {
305     const vector<const FieldDescriptor*>& key_field_path = key_field_paths[i];
306     for (int j = 0; j < key_field_path.size(); ++j) {
307       const FieldDescriptor* parent_field =
308           j == 0 ? field : key_field_path[j - 1];
309       const FieldDescriptor* child_field = key_field_path[j];
310       GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
311           << child_field->full_name()
312           << " must be a direct subfield within the field: "
313           << parent_field->full_name();
314       if (j != 0) {
315         GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
316             << parent_field->full_name() << " has to be of type message.";
317         GOOGLE_CHECK(!parent_field->is_repeated())
318             << parent_field->full_name() << " cannot be a repeated field.";
319       }
320     }
321   }
322   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
323       << "Cannot treat this repeated field as both Map and Set for "
324       << "comparison.";
325   MapKeyComparator* key_comparator =
326       new MultipleFieldsMapKeyComparator(this, key_field_paths);
327   owned_key_comparators_.push_back(key_comparator);
328   map_field_key_comparator_[field] = key_comparator;
329 }
330 
TreatAsMapUsingKeyComparator(const FieldDescriptor * field,const MapKeyComparator * key_comparator)331 void MessageDifferencer::TreatAsMapUsingKeyComparator(
332     const FieldDescriptor* field,
333     const MapKeyComparator* key_comparator) {
334   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
335                                << field->full_name();
336   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
337       << "Field has to be message type.  Field name is: "
338       << field->full_name();
339   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
340       << "Cannot treat this repeated field as both Map and Set for "
341       << "comparison.";
342   map_field_key_comparator_[field] = key_comparator;
343 }
344 
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)345 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
346   ignore_criteria_.push_back(ignore_criteria);
347 }
348 
IgnoreField(const FieldDescriptor * field)349 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
350   ignored_fields_.insert(field);
351 }
352 
SetFractionAndMargin(const FieldDescriptor * field,double fraction,double margin)353 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
354                                               double fraction, double margin) {
355   default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
356 }
357 
ReportDifferencesToString(string * output)358 void MessageDifferencer::ReportDifferencesToString(string* output) {
359   GOOGLE_DCHECK(output) << "Specified output string was NULL";
360 
361   output_string_ = output;
362   output_string_->clear();
363 }
364 
ReportDifferencesTo(Reporter * reporter)365 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
366   // If an output string is set, clear it to prevent
367   // it superceding the specified reporter.
368   if (output_string_) {
369     output_string_ = NULL;
370   }
371 
372   reporter_ = reporter;
373 }
374 
FieldBefore(const FieldDescriptor * field1,const FieldDescriptor * field2)375 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
376                                      const FieldDescriptor* field2) {
377   // Handle sentinel values (i.e. make sure NULLs are always ordered
378   // at the end of the list).
379   if (field1 == NULL) {
380     return false;
381   }
382 
383   if (field2 == NULL) {
384     return true;
385   }
386 
387   // Always order fields by their tag number
388   return (field1->number() < field2->number());
389 }
390 
Compare(const Message & message1,const Message & message2)391 bool MessageDifferencer::Compare(const Message& message1,
392                                  const Message& message2) {
393   vector<SpecificField> parent_fields;
394 
395   bool result = false;
396 
397   // Setup the internal reporter if need be.
398   if (output_string_) {
399     io::StringOutputStream output_stream(output_string_);
400     StreamReporter reporter(&output_stream);
401     reporter_ = &reporter;
402     result = Compare(message1, message2, &parent_fields);
403     reporter_ = NULL;
404   } else {
405     result = Compare(message1, message2, &parent_fields);
406   }
407 
408   return result;
409 }
410 
CompareWithFields(const Message & message1,const Message & message2,const vector<const FieldDescriptor * > & message1_fields_arg,const vector<const FieldDescriptor * > & message2_fields_arg)411 bool MessageDifferencer::CompareWithFields(
412     const Message& message1,
413     const Message& message2,
414     const vector<const FieldDescriptor*>& message1_fields_arg,
415     const vector<const FieldDescriptor*>& message2_fields_arg) {
416   if (message1.GetDescriptor() != message2.GetDescriptor()) {
417     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
418                 << "descriptors.";
419     return false;
420   }
421 
422   vector<SpecificField> parent_fields;
423 
424   bool result = false;
425 
426   vector<const FieldDescriptor*> message1_fields(message1_fields_arg);
427   vector<const FieldDescriptor*> message2_fields(message2_fields_arg);
428 
429   std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
430   std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
431   // Append NULL sentinel values.
432   message1_fields.push_back(NULL);
433   message2_fields.push_back(NULL);
434 
435   // Setup the internal reporter if need be.
436   if (output_string_) {
437     io::StringOutputStream output_stream(output_string_);
438     StreamReporter reporter(&output_stream);
439     reporter_ = &reporter;
440     result = CompareRequestedFieldsUsingSettings(
441         message1, message2, message1_fields, message2_fields, &parent_fields);
442     reporter_ = NULL;
443   } else {
444     result = CompareRequestedFieldsUsingSettings(
445         message1, message2, message1_fields, message2_fields, &parent_fields);
446   }
447 
448   return result;
449 }
450 
Compare(const Message & message1,const Message & message2,vector<SpecificField> * parent_fields)451 bool MessageDifferencer::Compare(
452     const Message& message1,
453     const Message& message2,
454     vector<SpecificField>* parent_fields) {
455   const Descriptor* descriptor1 = message1.GetDescriptor();
456   const Descriptor* descriptor2 = message2.GetDescriptor();
457   if (descriptor1 != descriptor2) {
458     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
459                 << "descriptors. "
460                 << descriptor1->full_name() << " vs "
461                 << descriptor2->full_name();
462     return false;
463   }
464   // Expand google.protobuf.Any payload if possible.
465   if (descriptor1->full_name() == internal::kAnyFullTypeName) {
466     google::protobuf::scoped_ptr<Message> data1;
467     google::protobuf::scoped_ptr<Message> data2;
468     if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
469       return Compare(*data1, *data2, parent_fields);
470     }
471   }
472   const Reflection* reflection1 = message1.GetReflection();
473   const Reflection* reflection2 = message2.GetReflection();
474 
475   // Retrieve all the set fields, including extensions.
476   vector<const FieldDescriptor*> message1_fields;
477   message1_fields.reserve(1 + message1.GetDescriptor()->field_count());
478 
479   vector<const FieldDescriptor*> message2_fields;
480   message2_fields.reserve(1 + message2.GetDescriptor()->field_count());
481 
482   reflection1->ListFields(message1, &message1_fields);
483   reflection2->ListFields(message2, &message2_fields);
484 
485   // Add sentinel values to deal with the
486   // case where the number of the fields in
487   // each list are different.
488   message1_fields.push_back(NULL);
489   message2_fields.push_back(NULL);
490 
491   bool unknown_compare_result = true;
492   // Ignore unknown fields in EQUIVALENT mode
493   if (message_field_comparison_ != EQUIVALENT) {
494     const google::protobuf::UnknownFieldSet* unknown_field_set1 =
495         &reflection1->GetUnknownFields(message1);
496     const google::protobuf::UnknownFieldSet* unknown_field_set2 =
497         &reflection2->GetUnknownFields(message2);
498     if (!CompareUnknownFields(message1, message2,
499                               *unknown_field_set1, *unknown_field_set2,
500                               parent_fields)) {
501       if (reporter_ == NULL) {
502         return false;
503       };
504       unknown_compare_result = false;
505     }
506   }
507 
508   return CompareRequestedFieldsUsingSettings(
509       message1, message2,
510       message1_fields, message2_fields,
511       parent_fields) && unknown_compare_result;
512 }
513 
CompareRequestedFieldsUsingSettings(const Message & message1,const Message & message2,const vector<const FieldDescriptor * > & message1_fields,const vector<const FieldDescriptor * > & message2_fields,vector<SpecificField> * parent_fields)514 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
515     const Message& message1,
516     const Message& message2,
517     const vector<const FieldDescriptor*>& message1_fields,
518     const vector<const FieldDescriptor*>& message2_fields,
519     vector<SpecificField>* parent_fields) {
520   if (scope_ == FULL) {
521     if (message_field_comparison_ == EQUIVALENT) {
522       // We need to merge the field lists of both messages (i.e.
523       // we are merely checking for a difference in field values,
524       // rather than the addition or deletion of fields).
525       vector<const FieldDescriptor*> fields_union;
526       CombineFields(message1_fields, FULL, message2_fields, FULL,
527                     &fields_union);
528       return CompareWithFieldsInternal(message1, message2, fields_union,
529                                        fields_union, parent_fields);
530     } else {
531       // Simple equality comparison, use the unaltered field lists.
532       return CompareWithFieldsInternal(message1, message2, message1_fields,
533                                        message2_fields, parent_fields);
534     }
535   } else {
536     if (message_field_comparison_ == EQUIVALENT) {
537       // We use the list of fields for message1 for both messages when
538       // comparing.  This way, extra fields in message2 are ignored,
539       // and missing fields in message2 use their default value.
540       return CompareWithFieldsInternal(message1, message2, message1_fields,
541                                        message1_fields, parent_fields);
542     } else {
543       // We need to consider the full list of fields for message1
544       // but only the intersection for message2.  This way, any fields
545       // only present in message2 will be ignored, but any fields only
546       // present in message1 will be marked as a difference.
547       vector<const FieldDescriptor*> fields_intersection;
548       CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL,
549                     &fields_intersection);
550       return CompareWithFieldsInternal(message1, message2, message1_fields,
551                                        fields_intersection, parent_fields);
552     }
553   }
554 }
555 
CombineFields(const vector<const FieldDescriptor * > & fields1,Scope fields1_scope,const vector<const FieldDescriptor * > & fields2,Scope fields2_scope,vector<const FieldDescriptor * > * combined_fields)556 void MessageDifferencer::CombineFields(
557     const vector<const FieldDescriptor*>& fields1,
558     Scope fields1_scope,
559     const vector<const FieldDescriptor*>& fields2,
560     Scope fields2_scope,
561     vector<const FieldDescriptor*>* combined_fields) {
562 
563   int index1 = 0;
564   int index2 = 0;
565 
566   while (index1 < fields1.size() && index2 < fields2.size()) {
567     const FieldDescriptor* field1 = fields1[index1];
568     const FieldDescriptor* field2 = fields2[index2];
569 
570     if (FieldBefore(field1, field2)) {
571       if (fields1_scope == FULL) {
572         combined_fields->push_back(fields1[index1]);
573       }
574       ++index1;
575     } else if (FieldBefore(field2, field1)) {
576       if (fields2_scope == FULL) {
577         combined_fields->push_back(fields2[index2]);
578       }
579       ++index2;
580     } else {
581       combined_fields->push_back(fields1[index1]);
582       ++index1;
583       ++index2;
584     }
585   }
586 }
587 
CompareWithFieldsInternal(const Message & message1,const Message & message2,const vector<const FieldDescriptor * > & message1_fields,const vector<const FieldDescriptor * > & message2_fields,vector<SpecificField> * parent_fields)588 bool MessageDifferencer::CompareWithFieldsInternal(
589     const Message& message1,
590     const Message& message2,
591     const vector<const FieldDescriptor*>& message1_fields,
592     const vector<const FieldDescriptor*>& message2_fields,
593     vector<SpecificField>* parent_fields) {
594   bool isDifferent = false;
595   int field_index1 = 0;
596   int field_index2 = 0;
597 
598   const Reflection* reflection1 = message1.GetReflection();
599   const Reflection* reflection2 = message2.GetReflection();
600 
601   while (true) {
602     const FieldDescriptor* field1 = message1_fields[field_index1];
603     const FieldDescriptor* field2 = message2_fields[field_index2];
604 
605     // Once we have reached sentinel values, we are done the comparison.
606     if (field1 == NULL && field2 == NULL) {
607       break;
608     }
609 
610     // Check for differences in the field itself.
611     if (FieldBefore(field1, field2)) {
612       // Field 1 is not in the field list for message 2.
613       if (IsIgnored(message1, message2, field1, *parent_fields)) {
614         // We are ignoring field1. Report the ignore and move on to
615         // the next field in message1_fields.
616         if (reporter_ != NULL) {
617           SpecificField specific_field;
618           specific_field.field = field1;
619 
620           parent_fields->push_back(specific_field);
621           reporter_->ReportIgnored(message1, message2, *parent_fields);
622           parent_fields->pop_back();
623         }
624         ++field_index1;
625         continue;
626       }
627 
628       if (reporter_ != NULL) {
629         int count = field1->is_repeated() ?
630             reflection1->FieldSize(message1, field1) : 1;
631 
632         for (int i = 0; i < count; ++i) {
633           SpecificField specific_field;
634           specific_field.field = field1;
635           specific_field.index = field1->is_repeated() ? i : -1;
636 
637           parent_fields->push_back(specific_field);
638           reporter_->ReportDeleted(message1, message2, *parent_fields);
639           parent_fields->pop_back();
640         }
641 
642         isDifferent = true;
643       } else {
644         return false;
645       }
646 
647       ++field_index1;
648       continue;
649     } else if (FieldBefore(field2, field1)) {
650       // Field 2 is not in the field list for message 1.
651       if (IsIgnored(message1, message2, field2, *parent_fields)) {
652         // We are ignoring field2. Report the ignore and move on to
653         // the next field in message2_fields.
654         if (reporter_ != NULL) {
655           SpecificField specific_field;
656           specific_field.field = field2;
657 
658           parent_fields->push_back(specific_field);
659           reporter_->ReportIgnored(message1, message2, *parent_fields);
660           parent_fields->pop_back();
661         }
662         ++field_index2;
663         continue;
664       }
665 
666       if (reporter_ != NULL) {
667         int count = field2->is_repeated() ?
668             reflection2->FieldSize(message2, field2) : 1;
669 
670         for (int i = 0; i < count; ++i) {
671           SpecificField specific_field;
672           specific_field.field = field2;
673           specific_field.index = field2->is_repeated() ? i : -1;
674           specific_field.new_index = specific_field.index;
675 
676           parent_fields->push_back(specific_field);
677           reporter_->ReportAdded(message1, message2, *parent_fields);
678           parent_fields->pop_back();
679         }
680 
681         isDifferent = true;
682       } else {
683         return false;
684       }
685 
686       ++field_index2;
687       continue;
688     }
689 
690     // By this point, field1 and field2 are guarenteed to point to the same
691     // field, so we can now compare the values.
692     if (IsIgnored(message1, message2, field1, *parent_fields)) {
693       // Ignore this field. Report and move on.
694       if (reporter_ != NULL) {
695         SpecificField specific_field;
696         specific_field.field = field1;
697 
698         parent_fields->push_back(specific_field);
699         reporter_->ReportIgnored(message1, message2, *parent_fields);
700         parent_fields->pop_back();
701       }
702 
703       ++field_index1;
704       ++field_index2;
705       continue;
706     }
707 
708     bool fieldDifferent = false;
709     if (field1->is_repeated()) {
710       fieldDifferent = !CompareRepeatedField(message1, message2, field1,
711                                              parent_fields);
712       if (fieldDifferent) {
713         if (reporter_ == NULL) return false;
714         isDifferent = true;
715       }
716     } else {
717       fieldDifferent = !CompareFieldValueUsingParentFields(
718           message1, message2, field1, -1, -1, parent_fields);
719 
720       // If we have found differences, either report them or terminate if
721       // no reporter is present.
722       if (fieldDifferent && reporter_ == NULL) {
723         return false;
724       }
725 
726       if (reporter_ != NULL) {
727         SpecificField specific_field;
728         specific_field.field = field1;
729         parent_fields->push_back(specific_field);
730         if (fieldDifferent) {
731           reporter_->ReportModified(message1, message2, *parent_fields);
732           isDifferent = true;
733         } else if (report_matches_) {
734           reporter_->ReportMatched(message1, message2, *parent_fields);
735         }
736         parent_fields->pop_back();
737       }
738     }
739     // Increment the field indicies.
740     ++field_index1;
741     ++field_index2;
742   }
743 
744   return !isDifferent;
745 }
746 
IsMatch(const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const Message * message1,const Message * message2,const vector<SpecificField> & parent_fields,int index1,int index2)747 bool MessageDifferencer::IsMatch(const FieldDescriptor* repeated_field,
748                                  const MapKeyComparator* key_comparator,
749                                  const Message* message1,
750                                  const Message* message2,
751                                  const vector<SpecificField>& parent_fields,
752                                  int index1, int index2) {
753   vector<SpecificField> current_parent_fields(parent_fields);
754   if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
755     return CompareFieldValueUsingParentFields(
756         *message1, *message2, repeated_field, index1, index2,
757         &current_parent_fields);
758   }
759   // Back up the Reporter and output_string_.  They will be reset in the
760   // following code.
761   Reporter* backup_reporter = reporter_;
762   string* output_string = output_string_;
763   reporter_ = NULL;
764   output_string_ = NULL;
765   bool match;
766 
767   if (key_comparator == NULL) {
768     match = CompareFieldValueUsingParentFields(
769         *message1, *message2, repeated_field, index1, index2,
770         &current_parent_fields);
771   } else {
772     const Reflection* reflection1 = message1->GetReflection();
773     const Reflection* reflection2 = message2->GetReflection();
774     const Message& m1 =
775         reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
776     const Message& m2 =
777         reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
778     SpecificField specific_field;
779     specific_field.field = repeated_field;
780     current_parent_fields.push_back(specific_field);
781     match = key_comparator->IsMatch(m1, m2, current_parent_fields);
782   }
783 
784   reporter_ = backup_reporter;
785   output_string_ = output_string;
786   return match;
787 }
788 
CompareRepeatedField(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,vector<SpecificField> * parent_fields)789 bool MessageDifferencer::CompareRepeatedField(
790     const Message& message1,
791     const Message& message2,
792     const FieldDescriptor* repeated_field,
793     vector<SpecificField>* parent_fields) {
794   // the input FieldDescriptor is guaranteed to be repeated field.
795   const Reflection* reflection1 = message1.GetReflection();
796   const Reflection* reflection2 = message2.GetReflection();
797   const int count1 = reflection1->FieldSize(message1, repeated_field);
798   const int count2 = reflection2->FieldSize(message2, repeated_field);
799   const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
800 
801   // If the field is not treated as subset and no detailed reports is needed,
802   // we do a quick check on the number of the elements to avoid unnecessary
803   // comparison.
804   if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
805     return false;
806   }
807   // A match can never be found if message1 has more items than message2.
808   if (count1 > count2 && reporter_ == NULL) {
809     return false;
810   }
811 
812   // These two list are used for store the index of the correspondent
813   // element in peer repeated field.
814   vector<int> match_list1;
815   vector<int> match_list2;
816 
817   // Try to match indices of the repeated fields. Return false if match fails
818   // and there's no detailed report needed.
819   if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
820                                  *parent_fields, &match_list1, &match_list2) &&
821       reporter_ == NULL) {
822     return false;
823   }
824 
825   bool fieldDifferent = false;
826   SpecificField specific_field;
827   specific_field.field = repeated_field;
828 
829   // At this point, we have already matched pairs of fields (with the reporting
830   // to be done later). Now to check if the paired elements are different.
831   for (int i = 0; i < count1; i++) {
832     if (match_list1[i] == -1) continue;
833     specific_field.index = i;
834     specific_field.new_index = match_list1[i];
835 
836     const bool result = CompareFieldValueUsingParentFields(
837         message1, message2, repeated_field, i, specific_field.new_index,
838         parent_fields);
839 
840     // If we have found differences, either report them or terminate if
841     // no reporter is present. Note that ReportModified, ReportMoved, and
842     // ReportMatched are all mutually exclusive.
843     if (!result) {
844       if (reporter_ == NULL) return false;
845       parent_fields->push_back(specific_field);
846       reporter_->ReportModified(message1, message2, *parent_fields);
847       parent_fields->pop_back();
848       fieldDifferent = true;
849     } else if (reporter_ != NULL &&
850                specific_field.index != specific_field.new_index) {
851       parent_fields->push_back(specific_field);
852       reporter_->ReportMoved(message1, message2, *parent_fields);
853       parent_fields->pop_back();
854     } else if (report_matches_ && reporter_ != NULL) {
855       parent_fields->push_back(specific_field);
856       reporter_->ReportMatched(message1, message2, *parent_fields);
857       parent_fields->pop_back();
858     }
859   }
860 
861   // Report any remaining additions or deletions.
862   for (int i = 0; i < count2; ++i) {
863     if (match_list2[i] != -1) continue;
864     if (!treated_as_subset) {
865       fieldDifferent = true;
866     }
867 
868     if (reporter_ == NULL) continue;
869     specific_field.index = i;
870     specific_field.new_index = i;
871     parent_fields->push_back(specific_field);
872     reporter_->ReportAdded(message1, message2, *parent_fields);
873     parent_fields->pop_back();
874   }
875 
876   for (int i = 0; i < count1; ++i) {
877     if (match_list1[i] != -1) continue;
878     specific_field.index = i;
879     parent_fields->push_back(specific_field);
880     reporter_->ReportDeleted(message1, message2, *parent_fields);
881     parent_fields->pop_back();
882     fieldDifferent = true;
883   }
884   return !fieldDifferent;
885 }
886 
CompareFieldValue(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2)887 bool MessageDifferencer::CompareFieldValue(const Message& message1,
888                                            const Message& message2,
889                                            const FieldDescriptor* field,
890                                            int index1,
891                                            int index2) {
892   return CompareFieldValueUsingParentFields(message1, message2, field, index1,
893                                             index2, NULL);
894 }
895 
CompareFieldValueUsingParentFields(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,vector<SpecificField> * parent_fields)896 bool MessageDifferencer::CompareFieldValueUsingParentFields(
897     const Message& message1, const Message& message2,
898     const FieldDescriptor* field, int index1, int index2,
899     vector<SpecificField>* parent_fields) {
900   FieldContext field_context(parent_fields);
901   FieldComparator::ComparisonResult result = GetFieldComparisonResult(
902       message1, message2, field, index1, index2, &field_context);
903 
904   if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
905       result == FieldComparator::RECURSE) {
906     // Get the nested messages and compare them using one of the Compare
907     // methods.
908     const Reflection* reflection1 = message1.GetReflection();
909     const Reflection* reflection2 = message2.GetReflection();
910     const Message& m1 = field->is_repeated() ?
911         reflection1->GetRepeatedMessage(message1, field, index1) :
912         reflection1->GetMessage(message1, field);
913     const Message& m2 = field->is_repeated() ?
914         reflection2->GetRepeatedMessage(message2, field, index2) :
915         reflection2->GetMessage(message2, field);
916 
917     // parent_fields is used in calls to Reporter methods.
918     if (parent_fields != NULL) {
919       // Append currently compared field to the end of parent_fields.
920       SpecificField specific_field;
921       specific_field.field = field;
922       specific_field.index = index1;
923       specific_field.new_index = index2;
924       parent_fields->push_back(specific_field);
925       const bool compare_result = Compare(m1, m2, parent_fields);
926       parent_fields->pop_back();
927       return compare_result;
928     } else {
929       // Recreates parent_fields as if m1 and m2 had no parents.
930       return Compare(m1, m2);
931     }
932   } else {
933     return (result == FieldComparator::SAME);
934   }
935 }
936 
CheckPathChanged(const vector<SpecificField> & field_path)937 bool MessageDifferencer::CheckPathChanged(
938     const vector<SpecificField>& field_path) {
939   for (int i = 0; i < field_path.size(); ++i) {
940     if (field_path[i].index != field_path[i].new_index) return true;
941   }
942   return false;
943 }
944 
IsTreatedAsSet(const FieldDescriptor * field)945 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
946   if (!field->is_repeated()) return false;
947   if (field->is_map()) return true;
948   if (repeated_field_comparison_ == AS_SET)
949     return list_fields_.find(field) == list_fields_.end();
950   return (set_fields_.find(field) != set_fields_.end());
951 }
952 
IsTreatedAsSubset(const FieldDescriptor * field)953 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
954   return scope_ == PARTIAL &&
955       (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
956 }
957 
IsIgnored(const Message & message1,const Message & message2,const FieldDescriptor * field,const vector<SpecificField> & parent_fields)958 bool MessageDifferencer::IsIgnored(
959     const Message& message1,
960     const Message& message2,
961     const FieldDescriptor* field,
962     const vector<SpecificField>& parent_fields) {
963   if (ignored_fields_.find(field) != ignored_fields_.end()) {
964     return true;
965   }
966   for (int i = 0; i < ignore_criteria_.size(); ++i) {
967     if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
968                                        parent_fields)) {
969       return true;
970     }
971   }
972   return false;
973 }
974 
IsUnknownFieldIgnored(const Message & message1,const Message & message2,const SpecificField & field,const vector<SpecificField> & parent_fields)975 bool MessageDifferencer::IsUnknownFieldIgnored(
976     const Message& message1, const Message& message2,
977     const SpecificField& field, const vector<SpecificField>& parent_fields) {
978   for (int i = 0; i < ignore_criteria_.size(); ++i) {
979     if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
980                                                    parent_fields)) {
981       return true;
982     }
983   }
984   return false;
985 }
986 
987 const MessageDifferencer::MapKeyComparator* MessageDifferencer
GetMapKeyComparator(const FieldDescriptor * field)988     ::GetMapKeyComparator(const FieldDescriptor* field) {
989   if (!field->is_repeated()) return NULL;
990   if (map_field_key_comparator_.find(field) !=
991       map_field_key_comparator_.end()) {
992     return map_field_key_comparator_[field];
993   }
994   return NULL;
995 }
996 
997 namespace {
998 
999 typedef pair<int, const UnknownField*> IndexUnknownFieldPair;
1000 
1001 struct UnknownFieldOrdering {
operator ()google::protobuf::util::__anon672b6d760111::UnknownFieldOrdering1002   inline bool operator()(const IndexUnknownFieldPair& a,
1003                          const IndexUnknownFieldPair& b) const {
1004     if (a.second->number() < b.second->number()) return true;
1005     if (a.second->number() > b.second->number()) return false;
1006     return a.second->type() < b.second->type();
1007   }
1008 };
1009 
1010 }  // namespace
1011 
UnpackAny(const Message & any,google::protobuf::scoped_ptr<Message> * data)1012 bool MessageDifferencer::UnpackAny(const Message& any,
1013                                    google::protobuf::scoped_ptr<Message>* data) {
1014   const Reflection* reflection = any.GetReflection();
1015   const FieldDescriptor* type_url_field;
1016   const FieldDescriptor* value_field;
1017   if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1018     return false;
1019   }
1020   const string& type_url = reflection->GetString(any, type_url_field);
1021   string full_type_name;
1022   if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1023     return false;
1024   }
1025 
1026   const google::protobuf::Descriptor* desc =
1027       any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1028           full_type_name);
1029   if (desc == NULL) {
1030     GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1031     return false;
1032   }
1033 
1034   if (dynamic_message_factory_ == NULL) {
1035     dynamic_message_factory_.reset(new DynamicMessageFactory());
1036   }
1037   data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1038   string serialized_value = reflection->GetString(any, value_field);
1039   if (!(*data)->ParseFromString(serialized_value)) {
1040     GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1041     return false;
1042   }
1043   return true;
1044 }
1045 
CompareUnknownFields(const Message & message1,const Message & message2,const google::protobuf::UnknownFieldSet & unknown_field_set1,const google::protobuf::UnknownFieldSet & unknown_field_set2,vector<SpecificField> * parent_field)1046 bool MessageDifferencer::CompareUnknownFields(
1047     const Message& message1, const Message& message2,
1048     const google::protobuf::UnknownFieldSet& unknown_field_set1,
1049     const google::protobuf::UnknownFieldSet& unknown_field_set2,
1050     vector<SpecificField>* parent_field) {
1051   // Ignore unknown fields in EQUIVALENT mode.
1052   if (message_field_comparison_ == EQUIVALENT) return true;
1053 
1054   if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1055     return true;
1056   }
1057 
1058   bool is_different = false;
1059 
1060   // We first sort the unknown fields by field number and type (in other words,
1061   // in tag order), making sure to preserve ordering of values with the same
1062   // tag.  This allows us to report only meaningful differences between the
1063   // two sets -- that is, differing values for the same tag.  We use
1064   // IndexUnknownFieldPairs to keep track of the field's original index for
1065   // reporting purposes.
1066   vector<IndexUnknownFieldPair> fields1;  // unknown_field_set1, sorted
1067   vector<IndexUnknownFieldPair> fields2;  // unknown_field_set2, sorted
1068   fields1.reserve(unknown_field_set1.field_count());
1069   fields2.reserve(unknown_field_set2.field_count());
1070 
1071   for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1072     fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1073   }
1074   for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1075     fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1076   }
1077 
1078   UnknownFieldOrdering is_before;
1079   std::stable_sort(fields1.begin(), fields1.end(), is_before);
1080   std::stable_sort(fields2.begin(), fields2.end(), is_before);
1081 
1082   // In order to fill in SpecificField::index, we have to keep track of how
1083   // many values we've seen with the same field number and type.
1084   // current_repeated points at the first field in this range, and
1085   // current_repeated_start{1,2} are the indexes of the first field in the
1086   // range within fields1 and fields2.
1087   const UnknownField* current_repeated = NULL;
1088   int current_repeated_start1 = 0;
1089   int current_repeated_start2 = 0;
1090 
1091   // Now that we have two sorted lists, we can detect fields which appear only
1092   // in one list or the other by traversing them simultaneously.
1093   int index1 = 0;
1094   int index2 = 0;
1095   while (index1 < fields1.size() || index2 < fields2.size()) {
1096     enum { ADDITION, DELETION, MODIFICATION, COMPARE_GROUPS,
1097       NO_CHANGE } change_type;
1098 
1099     // focus_field is the field we're currently reporting on.  (In the case
1100     // of a modification, it's the field on the left side.)
1101     const UnknownField* focus_field;
1102     bool match = false;
1103 
1104     if (index2 == fields2.size() ||
1105         (index1 < fields1.size() &&
1106           is_before(fields1[index1], fields2[index2]))) {
1107       // fields1[index1] is not present in fields2.
1108       change_type = DELETION;
1109       focus_field = fields1[index1].second;
1110     } else if (index1 == fields1.size() ||
1111                is_before(fields2[index2], fields1[index1])) {
1112       // fields2[index2] is not present in fields1.
1113       if (scope_ == PARTIAL) {
1114         // Ignore.
1115         ++index2;
1116         continue;
1117       }
1118       change_type = ADDITION;
1119       focus_field = fields2[index2].second;
1120     } else {
1121       // Field type and number are the same.  See if the values differ.
1122       change_type = MODIFICATION;
1123       focus_field = fields1[index1].second;
1124 
1125       switch (focus_field->type()) {
1126         case UnknownField::TYPE_VARINT:
1127           match = fields1[index1].second->varint() ==
1128                   fields2[index2].second->varint();
1129           break;
1130         case UnknownField::TYPE_FIXED32:
1131           match = fields1[index1].second->fixed32() ==
1132                   fields2[index2].second->fixed32();
1133           break;
1134         case UnknownField::TYPE_FIXED64:
1135           match = fields1[index1].second->fixed64() ==
1136                   fields2[index2].second->fixed64();
1137           break;
1138         case UnknownField::TYPE_LENGTH_DELIMITED:
1139           match = fields1[index1].second->length_delimited() ==
1140                   fields2[index2].second->length_delimited();
1141           break;
1142         case UnknownField::TYPE_GROUP:
1143           // We must deal with this later, after building the SpecificField.
1144           change_type = COMPARE_GROUPS;
1145           break;
1146       }
1147       if (match && change_type != COMPARE_GROUPS) {
1148         change_type = NO_CHANGE;
1149       }
1150     }
1151 
1152     if (current_repeated == NULL ||
1153         focus_field->number() != current_repeated->number() ||
1154         focus_field->type() != current_repeated->type()) {
1155       // We've started a new repeated field.
1156       current_repeated = focus_field;
1157       current_repeated_start1 = index1;
1158       current_repeated_start2 = index2;
1159     }
1160 
1161     if (change_type == NO_CHANGE && reporter_ == NULL) {
1162       // Fields were already compared and matched and we have no reporter.
1163       ++index1;
1164       ++index2;
1165       continue;
1166     }
1167 
1168     // Build the SpecificField.  This is slightly complicated.
1169     SpecificField specific_field;
1170     specific_field.unknown_field_number = focus_field->number();
1171     specific_field.unknown_field_type = focus_field->type();
1172 
1173     specific_field.unknown_field_set1 = &unknown_field_set1;
1174     specific_field.unknown_field_set2 = &unknown_field_set2;
1175 
1176     if (change_type != ADDITION) {
1177       specific_field.unknown_field_index1 = fields1[index1].first;
1178     }
1179     if (change_type != DELETION) {
1180       specific_field.unknown_field_index2 = fields2[index2].first;
1181     }
1182 
1183     // Calculate the field index.
1184     if (change_type == ADDITION) {
1185       specific_field.index = index2 - current_repeated_start2;
1186       specific_field.new_index = index2 - current_repeated_start2;
1187     } else {
1188       specific_field.index = index1 - current_repeated_start1;
1189       specific_field.new_index = index2 - current_repeated_start2;
1190     }
1191 
1192     if (IsUnknownFieldIgnored(message1, message2, specific_field,
1193                               *parent_field)) {
1194       if (reporter_ != NULL) {
1195         parent_field->push_back(specific_field);
1196         reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1197         parent_field->pop_back();
1198       }
1199       return true;
1200     }
1201 
1202     if (change_type == ADDITION || change_type == DELETION ||
1203         change_type == MODIFICATION) {
1204       if (reporter_ == NULL) {
1205         // We found a difference and we have no reproter.
1206         return false;
1207       }
1208       is_different = true;
1209     }
1210 
1211     parent_field->push_back(specific_field);
1212 
1213     switch (change_type) {
1214       case ADDITION:
1215         reporter_->ReportAdded(message1, message2, *parent_field);
1216         ++index2;
1217         break;
1218       case DELETION:
1219         reporter_->ReportDeleted(message1, message2, *parent_field);
1220         ++index1;
1221         break;
1222       case MODIFICATION:
1223         reporter_->ReportModified(message1, message2, *parent_field);
1224         ++index1;
1225         ++index2;
1226         break;
1227       case COMPARE_GROUPS:
1228         if (!CompareUnknownFields(message1, message2,
1229                                   fields1[index1].second->group(),
1230                                   fields2[index2].second->group(),
1231                                   parent_field)) {
1232           if (reporter_ == NULL) return false;
1233           is_different = true;
1234           reporter_->ReportModified(message1, message2, *parent_field);
1235         }
1236         ++index1;
1237         ++index2;
1238         break;
1239       case NO_CHANGE:
1240         ++index1;
1241         ++index2;
1242         if (report_matches_) {
1243           reporter_->ReportMatched(message1, message2, *parent_field);
1244         }
1245     }
1246 
1247     parent_field->pop_back();
1248   }
1249 
1250   return !is_different;
1251 }
1252 
1253 namespace {
1254 
1255 // Find maximum bipartite matching using the argumenting path algorithm.
1256 class MaximumMatcher {
1257  public:
1258   typedef ResultCallback2<bool, int, int> NodeMatchCallback;
1259   // MaximumMatcher takes ownership of the passed in callback and uses it to
1260   // determine whether a node on the left side of the bipartial graph matches
1261   // a node on the right side. count1 is the number of nodes on the left side
1262   // of the graph and count2 to is the number of nodes on the right side.
1263   // Every node is referred to using 0-based indices.
1264   // If a maximum match is found, the result will be stored in match_list1 and
1265   // match_list2. match_list1[i] == j means the i-th node on the left side is
1266   // matched to the j-th node on the right side and match_list2[x] == y means
1267   // the x-th node on the right side is matched to y-th node on the left side.
1268   // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1269   MaximumMatcher(int count1, int count2, NodeMatchCallback* callback,
1270                  vector<int>* match_list1, vector<int>* match_list2);
1271   // Find a maximum match and return the number of matched node pairs.
1272   // If early_return is true, this method will return 0 immediately when it
1273   // finds that not all nodes on the left side can be matched.
1274   int FindMaximumMatch(bool early_return);
1275  private:
1276   // Determines whether the node on the left side of the bipartial graph
1277   // matches the one on the right side.
1278   bool Match(int left, int right);
1279   // Find an argumenting path starting from the node v on the left side. If a
1280   // path can be found, update match_list2_ to reflect the path and return
1281   // true.
1282   bool FindArgumentPathDFS(int v, vector<bool>* visited);
1283 
1284   int count1_;
1285   int count2_;
1286   google::protobuf::scoped_ptr<NodeMatchCallback> match_callback_;
1287   map<pair<int, int>, bool> cached_match_results_;
1288   vector<int>* match_list1_;
1289   vector<int>* match_list2_;
1290   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1291 };
1292 
MaximumMatcher(int count1,int count2,NodeMatchCallback * callback,vector<int> * match_list1,vector<int> * match_list2)1293 MaximumMatcher::MaximumMatcher(int count1, int count2,
1294                                NodeMatchCallback* callback,
1295                                vector<int>* match_list1,
1296                                vector<int>* match_list2)
1297     : count1_(count1), count2_(count2), match_callback_(callback),
1298       match_list1_(match_list1), match_list2_(match_list2) {
1299   match_list1_->assign(count1, -1);
1300   match_list2_->assign(count2, -1);
1301 }
1302 
FindMaximumMatch(bool early_return)1303 int MaximumMatcher::FindMaximumMatch(bool early_return) {
1304   int result = 0;
1305   for (int i = 0; i < count1_; ++i) {
1306     vector<bool> visited(count1_);
1307     if (FindArgumentPathDFS(i, &visited)) {
1308       ++result;
1309     } else if (early_return) {
1310       return 0;
1311     }
1312   }
1313   // Backfill match_list1_ as we only filled match_list2_ when finding
1314   // argumenting pathes.
1315   for (int i = 0; i < count2_; ++i) {
1316     if ((*match_list2_)[i] != -1) {
1317       (*match_list1_)[(*match_list2_)[i]] = i;
1318     }
1319   }
1320   return result;
1321 }
1322 
Match(int left,int right)1323 bool MaximumMatcher::Match(int left, int right) {
1324   pair<int, int> p(left, right);
1325   map<pair<int, int>, bool>::iterator it = cached_match_results_.find(p);
1326   if (it != cached_match_results_.end()) {
1327     return it->second;
1328   }
1329   cached_match_results_[p] = match_callback_->Run(left, right);
1330   return cached_match_results_[p];
1331 }
1332 
FindArgumentPathDFS(int v,vector<bool> * visited)1333 bool MaximumMatcher::FindArgumentPathDFS(int v, vector<bool>* visited) {
1334   (*visited)[v] = true;
1335   // We try to match those un-matched nodes on the right side first. This is
1336   // the step that the navie greedy matching algorithm uses. In the best cases
1337   // where the greedy algorithm can find a maximum matching, we will always
1338   // find a match in this step and the performance will be identical to the
1339   // greedy algorithm.
1340   for (int i = 0; i < count2_; ++i) {
1341     int matched = (*match_list2_)[i];
1342     if (matched == -1 && Match(v, i)) {
1343       (*match_list2_)[i] = v;
1344       return true;
1345     }
1346   }
1347   // Then we try those already matched nodes and see if we can find an
1348   // alternaive match for the node matched to them.
1349   // The greedy algorithm will stop before this and fail to produce the
1350   // correct result.
1351   for (int i = 0; i < count2_; ++i) {
1352     int matched = (*match_list2_)[i];
1353     if (matched != -1 && Match(v, i)) {
1354       if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1355         (*match_list2_)[i] = v;
1356         return true;
1357       }
1358     }
1359   }
1360   return false;
1361 }
1362 
1363 }  // namespace
1364 
MatchRepeatedFieldIndices(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,const vector<SpecificField> & parent_fields,vector<int> * match_list1,vector<int> * match_list2)1365 bool MessageDifferencer::MatchRepeatedFieldIndices(
1366     const Message& message1,
1367     const Message& message2,
1368     const FieldDescriptor* repeated_field,
1369     const vector<SpecificField>& parent_fields,
1370     vector<int>* match_list1,
1371     vector<int>* match_list2) {
1372   const int count1 =
1373       message1.GetReflection()->FieldSize(message1, repeated_field);
1374   const int count2 =
1375       message2.GetReflection()->FieldSize(message2, repeated_field);
1376   const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
1377 
1378   match_list1->assign(count1, -1);
1379   match_list2->assign(count2, -1);
1380 
1381   SpecificField specific_field;
1382   specific_field.field = repeated_field;
1383 
1384   bool success = true;
1385   // Find potential match if this is a special repeated field.
1386   if (key_comparator != NULL || IsTreatedAsSet(repeated_field)) {
1387     if (scope_ == PARTIAL) {
1388       // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1389       // doesn't necessarily imply Compare(b, c). Therefore a naive greedy
1390       // algorithm will fail to find a maximum matching.
1391       // Here we use the argumenting path algorithm.
1392       MaximumMatcher::NodeMatchCallback* callback =
1393           ::google::protobuf::internal::NewPermanentCallback(
1394               this, &MessageDifferencer::IsMatch,
1395               repeated_field, key_comparator,
1396               &message1, &message2, parent_fields);
1397       MaximumMatcher matcher(count1, count2, callback, match_list1,
1398                              match_list2);
1399       // If diff info is not needed, we should end the matching process as
1400       // soon as possible if not all items can be matched.
1401       bool early_return = (reporter_ == NULL);
1402       int match_count = matcher.FindMaximumMatch(early_return);
1403       if (match_count != count1 && reporter_ == NULL) return false;
1404       success = success && (match_count == count1);
1405     } else {
1406       for (int i = 0; i < count1; ++i) {
1407         // Indicates any matched elements for this repeated field.
1408         bool match = false;
1409 
1410         specific_field.index = i;
1411         specific_field.new_index = i;
1412 
1413         for (int j = 0; j < count2; j++) {
1414           if (match_list2->at(j) != -1) continue;
1415           specific_field.index = i;
1416           specific_field.new_index = j;
1417 
1418           match = IsMatch(repeated_field, key_comparator,
1419                           &message1, &message2, parent_fields, i, j);
1420 
1421           if (match) {
1422             match_list1->at(specific_field.index) = specific_field.new_index;
1423             match_list2->at(specific_field.new_index) = specific_field.index;
1424             break;
1425           }
1426         }
1427         if (!match && reporter_ == NULL) return false;
1428         success = success && match;
1429       }
1430     }
1431   } else {
1432     // If this field should be treated as list, just label the match_list.
1433     for (int i = 0; i < count1 && i < count2; i++) {
1434       match_list1->at(i) = i;
1435       match_list2->at(i) = i;
1436     }
1437   }
1438 
1439   return success;
1440 }
1441 
GetFieldComparisonResult(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,const FieldContext * field_context)1442 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1443     const Message& message1, const Message& message2,
1444     const FieldDescriptor* field, int index1, int index2,
1445     const FieldContext* field_context) {
1446   FieldComparator* comparator = field_comparator_ != NULL ?
1447       field_comparator_ : &default_field_comparator_;
1448   return comparator->Compare(message1, message2, field,
1449                              index1, index2, field_context);
1450 }
1451 
1452 // ===========================================================================
1453 
Reporter()1454 MessageDifferencer::Reporter::Reporter() { }
~Reporter()1455 MessageDifferencer::Reporter::~Reporter() {}
1456 
1457 // ===========================================================================
1458 
MapKeyComparator()1459 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
~MapKeyComparator()1460 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1461 
1462 // ===========================================================================
1463 
IgnoreCriteria()1464 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
~IgnoreCriteria()1465 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1466 
1467 // ===========================================================================
1468 
1469 // Note that the printer's delimiter is not used, because if we are given a
1470 // printer, we don't know its delimiter.
StreamReporter(io::ZeroCopyOutputStream * output)1471 MessageDifferencer::StreamReporter::StreamReporter(
1472     io::ZeroCopyOutputStream* output) : printer_(new io::Printer(output, '$')),
1473                                         delete_printer_(true),
1474                                         report_modified_aggregates_(false) { }
1475 
StreamReporter(io::Printer * printer)1476 MessageDifferencer::StreamReporter::StreamReporter(
1477     io::Printer* printer) : printer_(printer),
1478                             delete_printer_(false),
1479                             report_modified_aggregates_(false) { }
1480 
~StreamReporter()1481 MessageDifferencer::StreamReporter::~StreamReporter() {
1482   if (delete_printer_) delete printer_;
1483 }
1484 
PrintPath(const vector<SpecificField> & field_path,bool left_side)1485 void MessageDifferencer::StreamReporter::PrintPath(
1486     const vector<SpecificField>& field_path, bool left_side) {
1487   for (int i = 0; i < field_path.size(); ++i) {
1488     if (i > 0) {
1489       printer_->Print(".");
1490     }
1491 
1492     SpecificField specific_field = field_path[i];
1493 
1494     if (specific_field.field != NULL) {
1495       if (specific_field.field->is_extension()) {
1496         printer_->Print("($name$)", "name",
1497                         specific_field.field->full_name());
1498       } else {
1499         printer_->PrintRaw(specific_field.field->name());
1500       }
1501     } else {
1502       printer_->PrintRaw(SimpleItoa(specific_field.unknown_field_number));
1503     }
1504     if (left_side && specific_field.index >= 0) {
1505       printer_->Print("[$name$]", "name", SimpleItoa(specific_field.index));
1506     }
1507     if (!left_side && specific_field.new_index >= 0) {
1508       printer_->Print("[$name$]", "name", SimpleItoa(specific_field.new_index));
1509     }
1510   }
1511 }
1512 
1513 void MessageDifferencer::
PrintValue(const Message & message,const vector<SpecificField> & field_path,bool left_side)1514 StreamReporter::PrintValue(const Message& message,
1515                            const vector<SpecificField>& field_path,
1516                            bool left_side) {
1517   const SpecificField& specific_field = field_path.back();
1518   const FieldDescriptor* field = specific_field.field;
1519   if (field != NULL) {
1520     string output;
1521     int index = left_side ? specific_field.index : specific_field.new_index;
1522     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1523       const Reflection* reflection = message.GetReflection();
1524       const Message& field_message = field->is_repeated() ?
1525           reflection->GetRepeatedMessage(message, field, index) :
1526           reflection->GetMessage(message, field);
1527       output = field_message.ShortDebugString();
1528       if (output.empty()) {
1529         printer_->Print("{ }");
1530       } else {
1531         printer_->Print("{ $name$ }", "name", output);
1532       }
1533     } else {
1534       TextFormat::PrintFieldValueToString(message, field, index, &output);
1535       printer_->PrintRaw(output);
1536     }
1537   } else {
1538     const UnknownFieldSet* unknown_fields =
1539         (left_side ?
1540          specific_field.unknown_field_set1 :
1541          specific_field.unknown_field_set2);
1542     const UnknownField* unknown_field = &unknown_fields->field(
1543         left_side ?
1544         specific_field.unknown_field_index1 :
1545         specific_field.unknown_field_index2);
1546     PrintUnknownFieldValue(unknown_field);
1547   }
1548 }
1549 
1550 void MessageDifferencer::
PrintUnknownFieldValue(const UnknownField * unknown_field)1551 StreamReporter::PrintUnknownFieldValue(const UnknownField* unknown_field) {
1552   GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1553 
1554   string output;
1555   switch (unknown_field->type()) {
1556     case UnknownField::TYPE_VARINT:
1557       output = SimpleItoa(unknown_field->varint());
1558       break;
1559     case UnknownField::TYPE_FIXED32:
1560       output = StrCat("0x", strings::Hex(unknown_field->fixed32(),
1561                                          strings::ZERO_PAD_8));
1562       break;
1563     case UnknownField::TYPE_FIXED64:
1564       output = StrCat("0x", strings::Hex(unknown_field->fixed64(),
1565                                          strings::ZERO_PAD_16));
1566       break;
1567     case UnknownField::TYPE_LENGTH_DELIMITED:
1568       output = StringPrintf("\"%s\"",
1569           CEscape(unknown_field->length_delimited()).c_str());
1570       break;
1571     case UnknownField::TYPE_GROUP:
1572       // TODO(kenton):  Print the contents of the group like we do for
1573       //   messages.  Requires an equivalent of ShortDebugString() for
1574       //   UnknownFieldSet.
1575       output = "{ ... }";
1576       break;
1577   }
1578   printer_->PrintRaw(output);
1579 }
1580 
Print(const string & str)1581 void MessageDifferencer::StreamReporter::Print(const string& str) {
1582   printer_->Print(str.c_str());
1583 }
1584 
ReportAdded(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1585 void MessageDifferencer::StreamReporter::ReportAdded(
1586     const Message& message1,
1587     const Message& message2,
1588     const vector<SpecificField>& field_path) {
1589   printer_->Print("added: ");
1590   PrintPath(field_path, false);
1591   printer_->Print(": ");
1592   PrintValue(message2, field_path, false);
1593   printer_->Print("\n");  // Print for newlines.
1594 }
1595 
ReportDeleted(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1596 void MessageDifferencer::StreamReporter::ReportDeleted(
1597     const Message& message1,
1598     const Message& message2,
1599     const vector<SpecificField>& field_path) {
1600   printer_->Print("deleted: ");
1601   PrintPath(field_path, true);
1602   printer_->Print(": ");
1603   PrintValue(message1, field_path, true);
1604   printer_->Print("\n");  // Print for newlines
1605 }
1606 
ReportModified(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1607 void MessageDifferencer::StreamReporter::ReportModified(
1608     const Message& message1,
1609     const Message& message2,
1610     const vector<SpecificField>& field_path) {
1611   if (!report_modified_aggregates_ && field_path.back().field == NULL) {
1612     if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
1613       // Any changes to the subfields have already been printed.
1614       return;
1615     }
1616   } else if (!report_modified_aggregates_) {
1617     if (field_path.back().field->cpp_type() ==
1618         FieldDescriptor::CPPTYPE_MESSAGE) {
1619       // Any changes to the subfields have already been printed.
1620       return;
1621     }
1622   }
1623 
1624   printer_->Print("modified: ");
1625   PrintPath(field_path, true);
1626   if (CheckPathChanged(field_path)) {
1627     printer_->Print(" -> ");
1628     PrintPath(field_path, false);
1629   }
1630   printer_->Print(": ");
1631   PrintValue(message1, field_path, true);
1632   printer_->Print(" -> ");
1633   PrintValue(message2, field_path, false);
1634   printer_->Print("\n");  // Print for newlines.
1635 }
1636 
ReportMoved(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1637 void MessageDifferencer::StreamReporter::ReportMoved(
1638     const Message& message1,
1639     const Message& message2,
1640     const vector<SpecificField>& field_path) {
1641   printer_->Print("moved: ");
1642   PrintPath(field_path, true);
1643   printer_->Print(" -> ");
1644   PrintPath(field_path, false);
1645   printer_->Print(" : ");
1646   PrintValue(message1, field_path, true);
1647   printer_->Print("\n");  // Print for newlines.
1648 }
1649 
ReportMatched(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1650 void MessageDifferencer::StreamReporter::ReportMatched(
1651     const Message& message1,
1652     const Message& message2,
1653     const vector<SpecificField>& field_path) {
1654   printer_->Print("matched: ");
1655   PrintPath(field_path, true);
1656   if (CheckPathChanged(field_path)) {
1657     printer_->Print(" -> ");
1658     PrintPath(field_path, false);
1659   }
1660   printer_->Print(" : ");
1661   PrintValue(message1, field_path, true);
1662   printer_->Print("\n");  // Print for newlines.
1663 }
1664 
ReportIgnored(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1665 void MessageDifferencer::StreamReporter::ReportIgnored(
1666     const Message& message1,
1667     const Message& message2,
1668     const vector<SpecificField>& field_path) {
1669   printer_->Print("ignored: ");
1670   PrintPath(field_path, true);
1671   if (CheckPathChanged(field_path)) {
1672     printer_->Print(" -> ");
1673     PrintPath(field_path, false);
1674   }
1675   printer_->Print("\n");  // Print for newlines.
1676 }
1677 
ReportUnknownFieldIgnored(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1678 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
1679     const Message& message1, const Message& message2,
1680     const vector<SpecificField>& field_path) {
1681   printer_->Print("ignored: ");
1682   PrintPath(field_path, true);
1683   if (CheckPathChanged(field_path)) {
1684     printer_->Print(" -> ");
1685     PrintPath(field_path, false);
1686   }
1687   printer_->Print("\n");  // Print for newlines.
1688 }
1689 
1690 }  // namespace util
1691 }  // namespace protobuf
1692 }  // namespace google
1693