1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file defines static methods and classes for comparing Protocol
36 // Messages (see //google/protobuf/util/message_differencer.h for more
37 // information).
38
39 #include <google/protobuf/util/message_differencer.h>
40
41 #include <algorithm>
42 #include <memory>
43 #ifndef _SHARED_PTR_H
44 #include <google/protobuf/stubs/shared_ptr.h>
45 #endif
46 #include <utility>
47
48 #include <google/protobuf/stubs/callback.h>
49 #include <google/protobuf/stubs/common.h>
50 #include <google/protobuf/stubs/logging.h>
51 #include <google/protobuf/stubs/stringprintf.h>
52 #include <google/protobuf/any.h>
53 #include <google/protobuf/io/printer.h>
54 #include <google/protobuf/io/zero_copy_stream.h>
55 #include <google/protobuf/io/zero_copy_stream_impl.h>
56 #include <google/protobuf/dynamic_message.h>
57 #include <google/protobuf/text_format.h>
58 #include <google/protobuf/util/field_comparator.h>
59 #include <google/protobuf/stubs/strutil.h>
60
61 namespace google {
62 namespace protobuf {
63
64 namespace util {
65
66 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
67 // be used to specify multiple fields as key for key comparison.
68 // Two elements of a repeated field will be regarded as having the same key
69 // iff they have the same value for every specified key field.
70 // Note that you can also specify only one field as key.
71 class MessageDifferencer::MultipleFieldsMapKeyComparator
72 : public MessageDifferencer::MapKeyComparator {
73 public:
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const vector<vector<const FieldDescriptor * >> & key_field_paths)74 MultipleFieldsMapKeyComparator(
75 MessageDifferencer* message_differencer,
76 const vector<vector<const FieldDescriptor*> >& key_field_paths)
77 : message_differencer_(message_differencer),
78 key_field_paths_(key_field_paths) {
79 GOOGLE_CHECK(!key_field_paths_.empty());
80 for (int i = 0; i < key_field_paths_.size(); ++i) {
81 GOOGLE_CHECK(!key_field_paths_[i].empty());
82 }
83 }
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const FieldDescriptor * key)84 MultipleFieldsMapKeyComparator(
85 MessageDifferencer* message_differencer,
86 const FieldDescriptor* key)
87 : message_differencer_(message_differencer) {
88 vector<const FieldDescriptor*> key_field_path;
89 key_field_path.push_back(key);
90 key_field_paths_.push_back(key_field_path);
91 }
IsMatch(const Message & message1,const Message & message2,const vector<SpecificField> & parent_fields) const92 virtual bool IsMatch(
93 const Message& message1,
94 const Message& message2,
95 const vector<SpecificField>& parent_fields) const {
96 for (int i = 0; i < key_field_paths_.size(); ++i) {
97 if (!IsMatchInternal(message1, message2, parent_fields,
98 key_field_paths_[i], 0)) {
99 return false;
100 }
101 }
102 return true;
103 }
104 private:
IsMatchInternal(const Message & message1,const Message & message2,const vector<SpecificField> & parent_fields,const vector<const FieldDescriptor * > & key_field_path,int path_index) const105 bool IsMatchInternal(
106 const Message& message1,
107 const Message& message2,
108 const vector<SpecificField>& parent_fields,
109 const vector<const FieldDescriptor*>& key_field_path,
110 int path_index) const {
111 const FieldDescriptor* field = key_field_path[path_index];
112 vector<SpecificField> current_parent_fields(parent_fields);
113 if (path_index == key_field_path.size() - 1) {
114 if (field->is_repeated()) {
115 if (!message_differencer_->CompareRepeatedField(
116 message1, message2, field, ¤t_parent_fields)) {
117 return false;
118 }
119 } else {
120 if (!message_differencer_->CompareFieldValueUsingParentFields(
121 message1, message2, field, -1, -1, ¤t_parent_fields)) {
122 return false;
123 }
124 }
125 return true;
126 } else {
127 const Reflection* reflection1 = message1.GetReflection();
128 const Reflection* reflection2 = message2.GetReflection();
129 bool has_field1 = reflection1->HasField(message1, field);
130 bool has_field2 = reflection2->HasField(message2, field);
131 if (!has_field1 && !has_field2) {
132 return true;
133 }
134 if (has_field1 != has_field2) {
135 return false;
136 }
137 SpecificField specific_field;
138 specific_field.field = field;
139 current_parent_fields.push_back(specific_field);
140 return IsMatchInternal(
141 reflection1->GetMessage(message1, field),
142 reflection2->GetMessage(message2, field),
143 current_parent_fields,
144 key_field_path,
145 path_index + 1);
146 }
147 }
148 MessageDifferencer* message_differencer_;
149 vector<vector<const FieldDescriptor*> > key_field_paths_;
150 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
151 };
152
Equals(const Message & message1,const Message & message2)153 bool MessageDifferencer::Equals(const Message& message1,
154 const Message& message2) {
155 MessageDifferencer differencer;
156
157 return differencer.Compare(message1, message2);
158 }
159
Equivalent(const Message & message1,const Message & message2)160 bool MessageDifferencer::Equivalent(const Message& message1,
161 const Message& message2) {
162 MessageDifferencer differencer;
163 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
164
165 return differencer.Compare(message1, message2);
166 }
167
ApproximatelyEquals(const Message & message1,const Message & message2)168 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
169 const Message& message2) {
170 MessageDifferencer differencer;
171 differencer.set_float_comparison(
172 MessageDifferencer::APPROXIMATE);
173
174 return differencer.Compare(message1, message2);
175 }
176
ApproximatelyEquivalent(const Message & message1,const Message & message2)177 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
178 const Message& message2) {
179 MessageDifferencer differencer;
180 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
181 differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
182
183 return differencer.Compare(message1, message2);
184 }
185
186 // ===========================================================================
187
MessageDifferencer()188 MessageDifferencer::MessageDifferencer()
189 : reporter_(NULL),
190 field_comparator_(NULL),
191 message_field_comparison_(EQUAL),
192 scope_(FULL),
193 repeated_field_comparison_(AS_LIST),
194 report_matches_(false),
195 output_string_(NULL) { }
196
~MessageDifferencer()197 MessageDifferencer::~MessageDifferencer() {
198 for (int i = 0; i < owned_key_comparators_.size(); ++i) {
199 delete owned_key_comparators_[i];
200 }
201 for (int i = 0; i < ignore_criteria_.size(); ++i) {
202 delete ignore_criteria_[i];
203 }
204 }
205
set_field_comparator(FieldComparator * comparator)206 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
207 GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
208 field_comparator_ = comparator;
209 }
210
set_message_field_comparison(MessageFieldComparison comparison)211 void MessageDifferencer::set_message_field_comparison(
212 MessageFieldComparison comparison) {
213 message_field_comparison_ = comparison;
214 }
215
set_scope(Scope scope)216 void MessageDifferencer::set_scope(Scope scope) {
217 scope_ = scope;
218 }
219
scope()220 MessageDifferencer::Scope MessageDifferencer::scope() {
221 return scope_;
222 }
223
set_float_comparison(FloatComparison comparison)224 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
225 default_field_comparator_.set_float_comparison(
226 comparison == EXACT ?
227 DefaultFieldComparator::EXACT : DefaultFieldComparator::APPROXIMATE);
228 }
229
set_repeated_field_comparison(RepeatedFieldComparison comparison)230 void MessageDifferencer::set_repeated_field_comparison(
231 RepeatedFieldComparison comparison) {
232 repeated_field_comparison_ = comparison;
233 }
234
TreatAsSet(const FieldDescriptor * field)235 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
236 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
237 << field->full_name();
238 const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
239 GOOGLE_CHECK(key_comparator == NULL)
240 << "Cannot treat this repeated field as both Map and Set for"
241 << " comparison. Field name is: " << field->full_name();
242 GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
243 << "Cannot treat the same field as both SET and LIST. Field name is: "
244 << field->full_name();
245 set_fields_.insert(field);
246 }
247
TreatAsList(const FieldDescriptor * field)248 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
249 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
250 << field->full_name();
251 const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
252 GOOGLE_CHECK(key_comparator == NULL)
253 << "Cannot treat this repeated field as both Map and Set for"
254 << " comparison. Field name is: " << field->full_name();
255 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
256 << "Cannot treat the same field as both SET and LIST. Field name is: "
257 << field->full_name();
258 list_fields_.insert(field);
259 }
260
TreatAsMap(const FieldDescriptor * field,const FieldDescriptor * key)261 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
262 const FieldDescriptor* key) {
263 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
264 << field->full_name();
265 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
266 << "Field has to be message type. Field name is: "
267 << field->full_name();
268 GOOGLE_CHECK(key->containing_type() == field->message_type())
269 << key->full_name()
270 << " must be a direct subfield within the repeated field "
271 << field->full_name() << ", not " << key->containing_type()->full_name();
272 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
273 << "Cannot treat this repeated field as both Map and Set for "
274 << "comparison.";
275 GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
276 << "Cannot treat this repeated field as both Map and List for "
277 << "comparison.";
278 MapKeyComparator* key_comparator =
279 new MultipleFieldsMapKeyComparator(this, key);
280 owned_key_comparators_.push_back(key_comparator);
281 map_field_key_comparator_[field] = key_comparator;
282 }
283
TreatAsMapWithMultipleFieldsAsKey(const FieldDescriptor * field,const vector<const FieldDescriptor * > & key_fields)284 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
285 const FieldDescriptor* field,
286 const vector<const FieldDescriptor*>& key_fields) {
287 vector<vector<const FieldDescriptor*> > key_field_paths;
288 for (int i = 0; i < key_fields.size(); ++i) {
289 vector<const FieldDescriptor*> key_field_path;
290 key_field_path.push_back(key_fields[i]);
291 key_field_paths.push_back(key_field_path);
292 }
293 TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
294 }
295
TreatAsMapWithMultipleFieldPathsAsKey(const FieldDescriptor * field,const vector<vector<const FieldDescriptor * >> & key_field_paths)296 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
297 const FieldDescriptor* field,
298 const vector<vector<const FieldDescriptor*> >& key_field_paths) {
299 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
300 << field->full_name();
301 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
302 << "Field has to be message type. Field name is: "
303 << field->full_name();
304 for (int i = 0; i < key_field_paths.size(); ++i) {
305 const vector<const FieldDescriptor*>& key_field_path = key_field_paths[i];
306 for (int j = 0; j < key_field_path.size(); ++j) {
307 const FieldDescriptor* parent_field =
308 j == 0 ? field : key_field_path[j - 1];
309 const FieldDescriptor* child_field = key_field_path[j];
310 GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
311 << child_field->full_name()
312 << " must be a direct subfield within the field: "
313 << parent_field->full_name();
314 if (j != 0) {
315 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
316 << parent_field->full_name() << " has to be of type message.";
317 GOOGLE_CHECK(!parent_field->is_repeated())
318 << parent_field->full_name() << " cannot be a repeated field.";
319 }
320 }
321 }
322 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
323 << "Cannot treat this repeated field as both Map and Set for "
324 << "comparison.";
325 MapKeyComparator* key_comparator =
326 new MultipleFieldsMapKeyComparator(this, key_field_paths);
327 owned_key_comparators_.push_back(key_comparator);
328 map_field_key_comparator_[field] = key_comparator;
329 }
330
TreatAsMapUsingKeyComparator(const FieldDescriptor * field,const MapKeyComparator * key_comparator)331 void MessageDifferencer::TreatAsMapUsingKeyComparator(
332 const FieldDescriptor* field,
333 const MapKeyComparator* key_comparator) {
334 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
335 << field->full_name();
336 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
337 << "Field has to be message type. Field name is: "
338 << field->full_name();
339 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
340 << "Cannot treat this repeated field as both Map and Set for "
341 << "comparison.";
342 map_field_key_comparator_[field] = key_comparator;
343 }
344
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)345 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
346 ignore_criteria_.push_back(ignore_criteria);
347 }
348
IgnoreField(const FieldDescriptor * field)349 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
350 ignored_fields_.insert(field);
351 }
352
SetFractionAndMargin(const FieldDescriptor * field,double fraction,double margin)353 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
354 double fraction, double margin) {
355 default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
356 }
357
ReportDifferencesToString(string * output)358 void MessageDifferencer::ReportDifferencesToString(string* output) {
359 GOOGLE_DCHECK(output) << "Specified output string was NULL";
360
361 output_string_ = output;
362 output_string_->clear();
363 }
364
ReportDifferencesTo(Reporter * reporter)365 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
366 // If an output string is set, clear it to prevent
367 // it superceding the specified reporter.
368 if (output_string_) {
369 output_string_ = NULL;
370 }
371
372 reporter_ = reporter;
373 }
374
FieldBefore(const FieldDescriptor * field1,const FieldDescriptor * field2)375 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
376 const FieldDescriptor* field2) {
377 // Handle sentinel values (i.e. make sure NULLs are always ordered
378 // at the end of the list).
379 if (field1 == NULL) {
380 return false;
381 }
382
383 if (field2 == NULL) {
384 return true;
385 }
386
387 // Always order fields by their tag number
388 return (field1->number() < field2->number());
389 }
390
Compare(const Message & message1,const Message & message2)391 bool MessageDifferencer::Compare(const Message& message1,
392 const Message& message2) {
393 vector<SpecificField> parent_fields;
394
395 bool result = false;
396
397 // Setup the internal reporter if need be.
398 if (output_string_) {
399 io::StringOutputStream output_stream(output_string_);
400 StreamReporter reporter(&output_stream);
401 reporter_ = &reporter;
402 result = Compare(message1, message2, &parent_fields);
403 reporter_ = NULL;
404 } else {
405 result = Compare(message1, message2, &parent_fields);
406 }
407
408 return result;
409 }
410
CompareWithFields(const Message & message1,const Message & message2,const vector<const FieldDescriptor * > & message1_fields_arg,const vector<const FieldDescriptor * > & message2_fields_arg)411 bool MessageDifferencer::CompareWithFields(
412 const Message& message1,
413 const Message& message2,
414 const vector<const FieldDescriptor*>& message1_fields_arg,
415 const vector<const FieldDescriptor*>& message2_fields_arg) {
416 if (message1.GetDescriptor() != message2.GetDescriptor()) {
417 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
418 << "descriptors.";
419 return false;
420 }
421
422 vector<SpecificField> parent_fields;
423
424 bool result = false;
425
426 vector<const FieldDescriptor*> message1_fields(message1_fields_arg);
427 vector<const FieldDescriptor*> message2_fields(message2_fields_arg);
428
429 std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
430 std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
431 // Append NULL sentinel values.
432 message1_fields.push_back(NULL);
433 message2_fields.push_back(NULL);
434
435 // Setup the internal reporter if need be.
436 if (output_string_) {
437 io::StringOutputStream output_stream(output_string_);
438 StreamReporter reporter(&output_stream);
439 reporter_ = &reporter;
440 result = CompareRequestedFieldsUsingSettings(
441 message1, message2, message1_fields, message2_fields, &parent_fields);
442 reporter_ = NULL;
443 } else {
444 result = CompareRequestedFieldsUsingSettings(
445 message1, message2, message1_fields, message2_fields, &parent_fields);
446 }
447
448 return result;
449 }
450
Compare(const Message & message1,const Message & message2,vector<SpecificField> * parent_fields)451 bool MessageDifferencer::Compare(
452 const Message& message1,
453 const Message& message2,
454 vector<SpecificField>* parent_fields) {
455 const Descriptor* descriptor1 = message1.GetDescriptor();
456 const Descriptor* descriptor2 = message2.GetDescriptor();
457 if (descriptor1 != descriptor2) {
458 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
459 << "descriptors. "
460 << descriptor1->full_name() << " vs "
461 << descriptor2->full_name();
462 return false;
463 }
464 // Expand google.protobuf.Any payload if possible.
465 if (descriptor1->full_name() == internal::kAnyFullTypeName) {
466 google::protobuf::scoped_ptr<Message> data1;
467 google::protobuf::scoped_ptr<Message> data2;
468 if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
469 return Compare(*data1, *data2, parent_fields);
470 }
471 }
472 const Reflection* reflection1 = message1.GetReflection();
473 const Reflection* reflection2 = message2.GetReflection();
474
475 // Retrieve all the set fields, including extensions.
476 vector<const FieldDescriptor*> message1_fields;
477 message1_fields.reserve(1 + message1.GetDescriptor()->field_count());
478
479 vector<const FieldDescriptor*> message2_fields;
480 message2_fields.reserve(1 + message2.GetDescriptor()->field_count());
481
482 reflection1->ListFields(message1, &message1_fields);
483 reflection2->ListFields(message2, &message2_fields);
484
485 // Add sentinel values to deal with the
486 // case where the number of the fields in
487 // each list are different.
488 message1_fields.push_back(NULL);
489 message2_fields.push_back(NULL);
490
491 bool unknown_compare_result = true;
492 // Ignore unknown fields in EQUIVALENT mode
493 if (message_field_comparison_ != EQUIVALENT) {
494 const google::protobuf::UnknownFieldSet* unknown_field_set1 =
495 &reflection1->GetUnknownFields(message1);
496 const google::protobuf::UnknownFieldSet* unknown_field_set2 =
497 &reflection2->GetUnknownFields(message2);
498 if (!CompareUnknownFields(message1, message2,
499 *unknown_field_set1, *unknown_field_set2,
500 parent_fields)) {
501 if (reporter_ == NULL) {
502 return false;
503 };
504 unknown_compare_result = false;
505 }
506 }
507
508 return CompareRequestedFieldsUsingSettings(
509 message1, message2,
510 message1_fields, message2_fields,
511 parent_fields) && unknown_compare_result;
512 }
513
CompareRequestedFieldsUsingSettings(const Message & message1,const Message & message2,const vector<const FieldDescriptor * > & message1_fields,const vector<const FieldDescriptor * > & message2_fields,vector<SpecificField> * parent_fields)514 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
515 const Message& message1,
516 const Message& message2,
517 const vector<const FieldDescriptor*>& message1_fields,
518 const vector<const FieldDescriptor*>& message2_fields,
519 vector<SpecificField>* parent_fields) {
520 if (scope_ == FULL) {
521 if (message_field_comparison_ == EQUIVALENT) {
522 // We need to merge the field lists of both messages (i.e.
523 // we are merely checking for a difference in field values,
524 // rather than the addition or deletion of fields).
525 vector<const FieldDescriptor*> fields_union;
526 CombineFields(message1_fields, FULL, message2_fields, FULL,
527 &fields_union);
528 return CompareWithFieldsInternal(message1, message2, fields_union,
529 fields_union, parent_fields);
530 } else {
531 // Simple equality comparison, use the unaltered field lists.
532 return CompareWithFieldsInternal(message1, message2, message1_fields,
533 message2_fields, parent_fields);
534 }
535 } else {
536 if (message_field_comparison_ == EQUIVALENT) {
537 // We use the list of fields for message1 for both messages when
538 // comparing. This way, extra fields in message2 are ignored,
539 // and missing fields in message2 use their default value.
540 return CompareWithFieldsInternal(message1, message2, message1_fields,
541 message1_fields, parent_fields);
542 } else {
543 // We need to consider the full list of fields for message1
544 // but only the intersection for message2. This way, any fields
545 // only present in message2 will be ignored, but any fields only
546 // present in message1 will be marked as a difference.
547 vector<const FieldDescriptor*> fields_intersection;
548 CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL,
549 &fields_intersection);
550 return CompareWithFieldsInternal(message1, message2, message1_fields,
551 fields_intersection, parent_fields);
552 }
553 }
554 }
555
CombineFields(const vector<const FieldDescriptor * > & fields1,Scope fields1_scope,const vector<const FieldDescriptor * > & fields2,Scope fields2_scope,vector<const FieldDescriptor * > * combined_fields)556 void MessageDifferencer::CombineFields(
557 const vector<const FieldDescriptor*>& fields1,
558 Scope fields1_scope,
559 const vector<const FieldDescriptor*>& fields2,
560 Scope fields2_scope,
561 vector<const FieldDescriptor*>* combined_fields) {
562
563 int index1 = 0;
564 int index2 = 0;
565
566 while (index1 < fields1.size() && index2 < fields2.size()) {
567 const FieldDescriptor* field1 = fields1[index1];
568 const FieldDescriptor* field2 = fields2[index2];
569
570 if (FieldBefore(field1, field2)) {
571 if (fields1_scope == FULL) {
572 combined_fields->push_back(fields1[index1]);
573 }
574 ++index1;
575 } else if (FieldBefore(field2, field1)) {
576 if (fields2_scope == FULL) {
577 combined_fields->push_back(fields2[index2]);
578 }
579 ++index2;
580 } else {
581 combined_fields->push_back(fields1[index1]);
582 ++index1;
583 ++index2;
584 }
585 }
586 }
587
CompareWithFieldsInternal(const Message & message1,const Message & message2,const vector<const FieldDescriptor * > & message1_fields,const vector<const FieldDescriptor * > & message2_fields,vector<SpecificField> * parent_fields)588 bool MessageDifferencer::CompareWithFieldsInternal(
589 const Message& message1,
590 const Message& message2,
591 const vector<const FieldDescriptor*>& message1_fields,
592 const vector<const FieldDescriptor*>& message2_fields,
593 vector<SpecificField>* parent_fields) {
594 bool isDifferent = false;
595 int field_index1 = 0;
596 int field_index2 = 0;
597
598 const Reflection* reflection1 = message1.GetReflection();
599 const Reflection* reflection2 = message2.GetReflection();
600
601 while (true) {
602 const FieldDescriptor* field1 = message1_fields[field_index1];
603 const FieldDescriptor* field2 = message2_fields[field_index2];
604
605 // Once we have reached sentinel values, we are done the comparison.
606 if (field1 == NULL && field2 == NULL) {
607 break;
608 }
609
610 // Check for differences in the field itself.
611 if (FieldBefore(field1, field2)) {
612 // Field 1 is not in the field list for message 2.
613 if (IsIgnored(message1, message2, field1, *parent_fields)) {
614 // We are ignoring field1. Report the ignore and move on to
615 // the next field in message1_fields.
616 if (reporter_ != NULL) {
617 SpecificField specific_field;
618 specific_field.field = field1;
619
620 parent_fields->push_back(specific_field);
621 reporter_->ReportIgnored(message1, message2, *parent_fields);
622 parent_fields->pop_back();
623 }
624 ++field_index1;
625 continue;
626 }
627
628 if (reporter_ != NULL) {
629 int count = field1->is_repeated() ?
630 reflection1->FieldSize(message1, field1) : 1;
631
632 for (int i = 0; i < count; ++i) {
633 SpecificField specific_field;
634 specific_field.field = field1;
635 specific_field.index = field1->is_repeated() ? i : -1;
636
637 parent_fields->push_back(specific_field);
638 reporter_->ReportDeleted(message1, message2, *parent_fields);
639 parent_fields->pop_back();
640 }
641
642 isDifferent = true;
643 } else {
644 return false;
645 }
646
647 ++field_index1;
648 continue;
649 } else if (FieldBefore(field2, field1)) {
650 // Field 2 is not in the field list for message 1.
651 if (IsIgnored(message1, message2, field2, *parent_fields)) {
652 // We are ignoring field2. Report the ignore and move on to
653 // the next field in message2_fields.
654 if (reporter_ != NULL) {
655 SpecificField specific_field;
656 specific_field.field = field2;
657
658 parent_fields->push_back(specific_field);
659 reporter_->ReportIgnored(message1, message2, *parent_fields);
660 parent_fields->pop_back();
661 }
662 ++field_index2;
663 continue;
664 }
665
666 if (reporter_ != NULL) {
667 int count = field2->is_repeated() ?
668 reflection2->FieldSize(message2, field2) : 1;
669
670 for (int i = 0; i < count; ++i) {
671 SpecificField specific_field;
672 specific_field.field = field2;
673 specific_field.index = field2->is_repeated() ? i : -1;
674 specific_field.new_index = specific_field.index;
675
676 parent_fields->push_back(specific_field);
677 reporter_->ReportAdded(message1, message2, *parent_fields);
678 parent_fields->pop_back();
679 }
680
681 isDifferent = true;
682 } else {
683 return false;
684 }
685
686 ++field_index2;
687 continue;
688 }
689
690 // By this point, field1 and field2 are guarenteed to point to the same
691 // field, so we can now compare the values.
692 if (IsIgnored(message1, message2, field1, *parent_fields)) {
693 // Ignore this field. Report and move on.
694 if (reporter_ != NULL) {
695 SpecificField specific_field;
696 specific_field.field = field1;
697
698 parent_fields->push_back(specific_field);
699 reporter_->ReportIgnored(message1, message2, *parent_fields);
700 parent_fields->pop_back();
701 }
702
703 ++field_index1;
704 ++field_index2;
705 continue;
706 }
707
708 bool fieldDifferent = false;
709 if (field1->is_repeated()) {
710 fieldDifferent = !CompareRepeatedField(message1, message2, field1,
711 parent_fields);
712 if (fieldDifferent) {
713 if (reporter_ == NULL) return false;
714 isDifferent = true;
715 }
716 } else {
717 fieldDifferent = !CompareFieldValueUsingParentFields(
718 message1, message2, field1, -1, -1, parent_fields);
719
720 // If we have found differences, either report them or terminate if
721 // no reporter is present.
722 if (fieldDifferent && reporter_ == NULL) {
723 return false;
724 }
725
726 if (reporter_ != NULL) {
727 SpecificField specific_field;
728 specific_field.field = field1;
729 parent_fields->push_back(specific_field);
730 if (fieldDifferent) {
731 reporter_->ReportModified(message1, message2, *parent_fields);
732 isDifferent = true;
733 } else if (report_matches_) {
734 reporter_->ReportMatched(message1, message2, *parent_fields);
735 }
736 parent_fields->pop_back();
737 }
738 }
739 // Increment the field indicies.
740 ++field_index1;
741 ++field_index2;
742 }
743
744 return !isDifferent;
745 }
746
IsMatch(const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const Message * message1,const Message * message2,const vector<SpecificField> & parent_fields,int index1,int index2)747 bool MessageDifferencer::IsMatch(const FieldDescriptor* repeated_field,
748 const MapKeyComparator* key_comparator,
749 const Message* message1,
750 const Message* message2,
751 const vector<SpecificField>& parent_fields,
752 int index1, int index2) {
753 vector<SpecificField> current_parent_fields(parent_fields);
754 if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
755 return CompareFieldValueUsingParentFields(
756 *message1, *message2, repeated_field, index1, index2,
757 ¤t_parent_fields);
758 }
759 // Back up the Reporter and output_string_. They will be reset in the
760 // following code.
761 Reporter* backup_reporter = reporter_;
762 string* output_string = output_string_;
763 reporter_ = NULL;
764 output_string_ = NULL;
765 bool match;
766
767 if (key_comparator == NULL) {
768 match = CompareFieldValueUsingParentFields(
769 *message1, *message2, repeated_field, index1, index2,
770 ¤t_parent_fields);
771 } else {
772 const Reflection* reflection1 = message1->GetReflection();
773 const Reflection* reflection2 = message2->GetReflection();
774 const Message& m1 =
775 reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
776 const Message& m2 =
777 reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
778 SpecificField specific_field;
779 specific_field.field = repeated_field;
780 current_parent_fields.push_back(specific_field);
781 match = key_comparator->IsMatch(m1, m2, current_parent_fields);
782 }
783
784 reporter_ = backup_reporter;
785 output_string_ = output_string;
786 return match;
787 }
788
CompareRepeatedField(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,vector<SpecificField> * parent_fields)789 bool MessageDifferencer::CompareRepeatedField(
790 const Message& message1,
791 const Message& message2,
792 const FieldDescriptor* repeated_field,
793 vector<SpecificField>* parent_fields) {
794 // the input FieldDescriptor is guaranteed to be repeated field.
795 const Reflection* reflection1 = message1.GetReflection();
796 const Reflection* reflection2 = message2.GetReflection();
797 const int count1 = reflection1->FieldSize(message1, repeated_field);
798 const int count2 = reflection2->FieldSize(message2, repeated_field);
799 const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
800
801 // If the field is not treated as subset and no detailed reports is needed,
802 // we do a quick check on the number of the elements to avoid unnecessary
803 // comparison.
804 if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
805 return false;
806 }
807 // A match can never be found if message1 has more items than message2.
808 if (count1 > count2 && reporter_ == NULL) {
809 return false;
810 }
811
812 // These two list are used for store the index of the correspondent
813 // element in peer repeated field.
814 vector<int> match_list1;
815 vector<int> match_list2;
816
817 // Try to match indices of the repeated fields. Return false if match fails
818 // and there's no detailed report needed.
819 if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
820 *parent_fields, &match_list1, &match_list2) &&
821 reporter_ == NULL) {
822 return false;
823 }
824
825 bool fieldDifferent = false;
826 SpecificField specific_field;
827 specific_field.field = repeated_field;
828
829 // At this point, we have already matched pairs of fields (with the reporting
830 // to be done later). Now to check if the paired elements are different.
831 for (int i = 0; i < count1; i++) {
832 if (match_list1[i] == -1) continue;
833 specific_field.index = i;
834 specific_field.new_index = match_list1[i];
835
836 const bool result = CompareFieldValueUsingParentFields(
837 message1, message2, repeated_field, i, specific_field.new_index,
838 parent_fields);
839
840 // If we have found differences, either report them or terminate if
841 // no reporter is present. Note that ReportModified, ReportMoved, and
842 // ReportMatched are all mutually exclusive.
843 if (!result) {
844 if (reporter_ == NULL) return false;
845 parent_fields->push_back(specific_field);
846 reporter_->ReportModified(message1, message2, *parent_fields);
847 parent_fields->pop_back();
848 fieldDifferent = true;
849 } else if (reporter_ != NULL &&
850 specific_field.index != specific_field.new_index) {
851 parent_fields->push_back(specific_field);
852 reporter_->ReportMoved(message1, message2, *parent_fields);
853 parent_fields->pop_back();
854 } else if (report_matches_ && reporter_ != NULL) {
855 parent_fields->push_back(specific_field);
856 reporter_->ReportMatched(message1, message2, *parent_fields);
857 parent_fields->pop_back();
858 }
859 }
860
861 // Report any remaining additions or deletions.
862 for (int i = 0; i < count2; ++i) {
863 if (match_list2[i] != -1) continue;
864 if (!treated_as_subset) {
865 fieldDifferent = true;
866 }
867
868 if (reporter_ == NULL) continue;
869 specific_field.index = i;
870 specific_field.new_index = i;
871 parent_fields->push_back(specific_field);
872 reporter_->ReportAdded(message1, message2, *parent_fields);
873 parent_fields->pop_back();
874 }
875
876 for (int i = 0; i < count1; ++i) {
877 if (match_list1[i] != -1) continue;
878 specific_field.index = i;
879 parent_fields->push_back(specific_field);
880 reporter_->ReportDeleted(message1, message2, *parent_fields);
881 parent_fields->pop_back();
882 fieldDifferent = true;
883 }
884 return !fieldDifferent;
885 }
886
CompareFieldValue(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2)887 bool MessageDifferencer::CompareFieldValue(const Message& message1,
888 const Message& message2,
889 const FieldDescriptor* field,
890 int index1,
891 int index2) {
892 return CompareFieldValueUsingParentFields(message1, message2, field, index1,
893 index2, NULL);
894 }
895
CompareFieldValueUsingParentFields(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,vector<SpecificField> * parent_fields)896 bool MessageDifferencer::CompareFieldValueUsingParentFields(
897 const Message& message1, const Message& message2,
898 const FieldDescriptor* field, int index1, int index2,
899 vector<SpecificField>* parent_fields) {
900 FieldContext field_context(parent_fields);
901 FieldComparator::ComparisonResult result = GetFieldComparisonResult(
902 message1, message2, field, index1, index2, &field_context);
903
904 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
905 result == FieldComparator::RECURSE) {
906 // Get the nested messages and compare them using one of the Compare
907 // methods.
908 const Reflection* reflection1 = message1.GetReflection();
909 const Reflection* reflection2 = message2.GetReflection();
910 const Message& m1 = field->is_repeated() ?
911 reflection1->GetRepeatedMessage(message1, field, index1) :
912 reflection1->GetMessage(message1, field);
913 const Message& m2 = field->is_repeated() ?
914 reflection2->GetRepeatedMessage(message2, field, index2) :
915 reflection2->GetMessage(message2, field);
916
917 // parent_fields is used in calls to Reporter methods.
918 if (parent_fields != NULL) {
919 // Append currently compared field to the end of parent_fields.
920 SpecificField specific_field;
921 specific_field.field = field;
922 specific_field.index = index1;
923 specific_field.new_index = index2;
924 parent_fields->push_back(specific_field);
925 const bool compare_result = Compare(m1, m2, parent_fields);
926 parent_fields->pop_back();
927 return compare_result;
928 } else {
929 // Recreates parent_fields as if m1 and m2 had no parents.
930 return Compare(m1, m2);
931 }
932 } else {
933 return (result == FieldComparator::SAME);
934 }
935 }
936
CheckPathChanged(const vector<SpecificField> & field_path)937 bool MessageDifferencer::CheckPathChanged(
938 const vector<SpecificField>& field_path) {
939 for (int i = 0; i < field_path.size(); ++i) {
940 if (field_path[i].index != field_path[i].new_index) return true;
941 }
942 return false;
943 }
944
IsTreatedAsSet(const FieldDescriptor * field)945 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
946 if (!field->is_repeated()) return false;
947 if (field->is_map()) return true;
948 if (repeated_field_comparison_ == AS_SET)
949 return list_fields_.find(field) == list_fields_.end();
950 return (set_fields_.find(field) != set_fields_.end());
951 }
952
IsTreatedAsSubset(const FieldDescriptor * field)953 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
954 return scope_ == PARTIAL &&
955 (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
956 }
957
IsIgnored(const Message & message1,const Message & message2,const FieldDescriptor * field,const vector<SpecificField> & parent_fields)958 bool MessageDifferencer::IsIgnored(
959 const Message& message1,
960 const Message& message2,
961 const FieldDescriptor* field,
962 const vector<SpecificField>& parent_fields) {
963 if (ignored_fields_.find(field) != ignored_fields_.end()) {
964 return true;
965 }
966 for (int i = 0; i < ignore_criteria_.size(); ++i) {
967 if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
968 parent_fields)) {
969 return true;
970 }
971 }
972 return false;
973 }
974
IsUnknownFieldIgnored(const Message & message1,const Message & message2,const SpecificField & field,const vector<SpecificField> & parent_fields)975 bool MessageDifferencer::IsUnknownFieldIgnored(
976 const Message& message1, const Message& message2,
977 const SpecificField& field, const vector<SpecificField>& parent_fields) {
978 for (int i = 0; i < ignore_criteria_.size(); ++i) {
979 if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
980 parent_fields)) {
981 return true;
982 }
983 }
984 return false;
985 }
986
987 const MessageDifferencer::MapKeyComparator* MessageDifferencer
GetMapKeyComparator(const FieldDescriptor * field)988 ::GetMapKeyComparator(const FieldDescriptor* field) {
989 if (!field->is_repeated()) return NULL;
990 if (map_field_key_comparator_.find(field) !=
991 map_field_key_comparator_.end()) {
992 return map_field_key_comparator_[field];
993 }
994 return NULL;
995 }
996
997 namespace {
998
999 typedef pair<int, const UnknownField*> IndexUnknownFieldPair;
1000
1001 struct UnknownFieldOrdering {
operator ()google::protobuf::util::__anon672b6d760111::UnknownFieldOrdering1002 inline bool operator()(const IndexUnknownFieldPair& a,
1003 const IndexUnknownFieldPair& b) const {
1004 if (a.second->number() < b.second->number()) return true;
1005 if (a.second->number() > b.second->number()) return false;
1006 return a.second->type() < b.second->type();
1007 }
1008 };
1009
1010 } // namespace
1011
UnpackAny(const Message & any,google::protobuf::scoped_ptr<Message> * data)1012 bool MessageDifferencer::UnpackAny(const Message& any,
1013 google::protobuf::scoped_ptr<Message>* data) {
1014 const Reflection* reflection = any.GetReflection();
1015 const FieldDescriptor* type_url_field;
1016 const FieldDescriptor* value_field;
1017 if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1018 return false;
1019 }
1020 const string& type_url = reflection->GetString(any, type_url_field);
1021 string full_type_name;
1022 if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1023 return false;
1024 }
1025
1026 const google::protobuf::Descriptor* desc =
1027 any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1028 full_type_name);
1029 if (desc == NULL) {
1030 GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1031 return false;
1032 }
1033
1034 if (dynamic_message_factory_ == NULL) {
1035 dynamic_message_factory_.reset(new DynamicMessageFactory());
1036 }
1037 data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1038 string serialized_value = reflection->GetString(any, value_field);
1039 if (!(*data)->ParseFromString(serialized_value)) {
1040 GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1041 return false;
1042 }
1043 return true;
1044 }
1045
CompareUnknownFields(const Message & message1,const Message & message2,const google::protobuf::UnknownFieldSet & unknown_field_set1,const google::protobuf::UnknownFieldSet & unknown_field_set2,vector<SpecificField> * parent_field)1046 bool MessageDifferencer::CompareUnknownFields(
1047 const Message& message1, const Message& message2,
1048 const google::protobuf::UnknownFieldSet& unknown_field_set1,
1049 const google::protobuf::UnknownFieldSet& unknown_field_set2,
1050 vector<SpecificField>* parent_field) {
1051 // Ignore unknown fields in EQUIVALENT mode.
1052 if (message_field_comparison_ == EQUIVALENT) return true;
1053
1054 if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1055 return true;
1056 }
1057
1058 bool is_different = false;
1059
1060 // We first sort the unknown fields by field number and type (in other words,
1061 // in tag order), making sure to preserve ordering of values with the same
1062 // tag. This allows us to report only meaningful differences between the
1063 // two sets -- that is, differing values for the same tag. We use
1064 // IndexUnknownFieldPairs to keep track of the field's original index for
1065 // reporting purposes.
1066 vector<IndexUnknownFieldPair> fields1; // unknown_field_set1, sorted
1067 vector<IndexUnknownFieldPair> fields2; // unknown_field_set2, sorted
1068 fields1.reserve(unknown_field_set1.field_count());
1069 fields2.reserve(unknown_field_set2.field_count());
1070
1071 for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1072 fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1073 }
1074 for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1075 fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1076 }
1077
1078 UnknownFieldOrdering is_before;
1079 std::stable_sort(fields1.begin(), fields1.end(), is_before);
1080 std::stable_sort(fields2.begin(), fields2.end(), is_before);
1081
1082 // In order to fill in SpecificField::index, we have to keep track of how
1083 // many values we've seen with the same field number and type.
1084 // current_repeated points at the first field in this range, and
1085 // current_repeated_start{1,2} are the indexes of the first field in the
1086 // range within fields1 and fields2.
1087 const UnknownField* current_repeated = NULL;
1088 int current_repeated_start1 = 0;
1089 int current_repeated_start2 = 0;
1090
1091 // Now that we have two sorted lists, we can detect fields which appear only
1092 // in one list or the other by traversing them simultaneously.
1093 int index1 = 0;
1094 int index2 = 0;
1095 while (index1 < fields1.size() || index2 < fields2.size()) {
1096 enum { ADDITION, DELETION, MODIFICATION, COMPARE_GROUPS,
1097 NO_CHANGE } change_type;
1098
1099 // focus_field is the field we're currently reporting on. (In the case
1100 // of a modification, it's the field on the left side.)
1101 const UnknownField* focus_field;
1102 bool match = false;
1103
1104 if (index2 == fields2.size() ||
1105 (index1 < fields1.size() &&
1106 is_before(fields1[index1], fields2[index2]))) {
1107 // fields1[index1] is not present in fields2.
1108 change_type = DELETION;
1109 focus_field = fields1[index1].second;
1110 } else if (index1 == fields1.size() ||
1111 is_before(fields2[index2], fields1[index1])) {
1112 // fields2[index2] is not present in fields1.
1113 if (scope_ == PARTIAL) {
1114 // Ignore.
1115 ++index2;
1116 continue;
1117 }
1118 change_type = ADDITION;
1119 focus_field = fields2[index2].second;
1120 } else {
1121 // Field type and number are the same. See if the values differ.
1122 change_type = MODIFICATION;
1123 focus_field = fields1[index1].second;
1124
1125 switch (focus_field->type()) {
1126 case UnknownField::TYPE_VARINT:
1127 match = fields1[index1].second->varint() ==
1128 fields2[index2].second->varint();
1129 break;
1130 case UnknownField::TYPE_FIXED32:
1131 match = fields1[index1].second->fixed32() ==
1132 fields2[index2].second->fixed32();
1133 break;
1134 case UnknownField::TYPE_FIXED64:
1135 match = fields1[index1].second->fixed64() ==
1136 fields2[index2].second->fixed64();
1137 break;
1138 case UnknownField::TYPE_LENGTH_DELIMITED:
1139 match = fields1[index1].second->length_delimited() ==
1140 fields2[index2].second->length_delimited();
1141 break;
1142 case UnknownField::TYPE_GROUP:
1143 // We must deal with this later, after building the SpecificField.
1144 change_type = COMPARE_GROUPS;
1145 break;
1146 }
1147 if (match && change_type != COMPARE_GROUPS) {
1148 change_type = NO_CHANGE;
1149 }
1150 }
1151
1152 if (current_repeated == NULL ||
1153 focus_field->number() != current_repeated->number() ||
1154 focus_field->type() != current_repeated->type()) {
1155 // We've started a new repeated field.
1156 current_repeated = focus_field;
1157 current_repeated_start1 = index1;
1158 current_repeated_start2 = index2;
1159 }
1160
1161 if (change_type == NO_CHANGE && reporter_ == NULL) {
1162 // Fields were already compared and matched and we have no reporter.
1163 ++index1;
1164 ++index2;
1165 continue;
1166 }
1167
1168 // Build the SpecificField. This is slightly complicated.
1169 SpecificField specific_field;
1170 specific_field.unknown_field_number = focus_field->number();
1171 specific_field.unknown_field_type = focus_field->type();
1172
1173 specific_field.unknown_field_set1 = &unknown_field_set1;
1174 specific_field.unknown_field_set2 = &unknown_field_set2;
1175
1176 if (change_type != ADDITION) {
1177 specific_field.unknown_field_index1 = fields1[index1].first;
1178 }
1179 if (change_type != DELETION) {
1180 specific_field.unknown_field_index2 = fields2[index2].first;
1181 }
1182
1183 // Calculate the field index.
1184 if (change_type == ADDITION) {
1185 specific_field.index = index2 - current_repeated_start2;
1186 specific_field.new_index = index2 - current_repeated_start2;
1187 } else {
1188 specific_field.index = index1 - current_repeated_start1;
1189 specific_field.new_index = index2 - current_repeated_start2;
1190 }
1191
1192 if (IsUnknownFieldIgnored(message1, message2, specific_field,
1193 *parent_field)) {
1194 if (reporter_ != NULL) {
1195 parent_field->push_back(specific_field);
1196 reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1197 parent_field->pop_back();
1198 }
1199 return true;
1200 }
1201
1202 if (change_type == ADDITION || change_type == DELETION ||
1203 change_type == MODIFICATION) {
1204 if (reporter_ == NULL) {
1205 // We found a difference and we have no reproter.
1206 return false;
1207 }
1208 is_different = true;
1209 }
1210
1211 parent_field->push_back(specific_field);
1212
1213 switch (change_type) {
1214 case ADDITION:
1215 reporter_->ReportAdded(message1, message2, *parent_field);
1216 ++index2;
1217 break;
1218 case DELETION:
1219 reporter_->ReportDeleted(message1, message2, *parent_field);
1220 ++index1;
1221 break;
1222 case MODIFICATION:
1223 reporter_->ReportModified(message1, message2, *parent_field);
1224 ++index1;
1225 ++index2;
1226 break;
1227 case COMPARE_GROUPS:
1228 if (!CompareUnknownFields(message1, message2,
1229 fields1[index1].second->group(),
1230 fields2[index2].second->group(),
1231 parent_field)) {
1232 if (reporter_ == NULL) return false;
1233 is_different = true;
1234 reporter_->ReportModified(message1, message2, *parent_field);
1235 }
1236 ++index1;
1237 ++index2;
1238 break;
1239 case NO_CHANGE:
1240 ++index1;
1241 ++index2;
1242 if (report_matches_) {
1243 reporter_->ReportMatched(message1, message2, *parent_field);
1244 }
1245 }
1246
1247 parent_field->pop_back();
1248 }
1249
1250 return !is_different;
1251 }
1252
1253 namespace {
1254
1255 // Find maximum bipartite matching using the argumenting path algorithm.
1256 class MaximumMatcher {
1257 public:
1258 typedef ResultCallback2<bool, int, int> NodeMatchCallback;
1259 // MaximumMatcher takes ownership of the passed in callback and uses it to
1260 // determine whether a node on the left side of the bipartial graph matches
1261 // a node on the right side. count1 is the number of nodes on the left side
1262 // of the graph and count2 to is the number of nodes on the right side.
1263 // Every node is referred to using 0-based indices.
1264 // If a maximum match is found, the result will be stored in match_list1 and
1265 // match_list2. match_list1[i] == j means the i-th node on the left side is
1266 // matched to the j-th node on the right side and match_list2[x] == y means
1267 // the x-th node on the right side is matched to y-th node on the left side.
1268 // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1269 MaximumMatcher(int count1, int count2, NodeMatchCallback* callback,
1270 vector<int>* match_list1, vector<int>* match_list2);
1271 // Find a maximum match and return the number of matched node pairs.
1272 // If early_return is true, this method will return 0 immediately when it
1273 // finds that not all nodes on the left side can be matched.
1274 int FindMaximumMatch(bool early_return);
1275 private:
1276 // Determines whether the node on the left side of the bipartial graph
1277 // matches the one on the right side.
1278 bool Match(int left, int right);
1279 // Find an argumenting path starting from the node v on the left side. If a
1280 // path can be found, update match_list2_ to reflect the path and return
1281 // true.
1282 bool FindArgumentPathDFS(int v, vector<bool>* visited);
1283
1284 int count1_;
1285 int count2_;
1286 google::protobuf::scoped_ptr<NodeMatchCallback> match_callback_;
1287 map<pair<int, int>, bool> cached_match_results_;
1288 vector<int>* match_list1_;
1289 vector<int>* match_list2_;
1290 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1291 };
1292
MaximumMatcher(int count1,int count2,NodeMatchCallback * callback,vector<int> * match_list1,vector<int> * match_list2)1293 MaximumMatcher::MaximumMatcher(int count1, int count2,
1294 NodeMatchCallback* callback,
1295 vector<int>* match_list1,
1296 vector<int>* match_list2)
1297 : count1_(count1), count2_(count2), match_callback_(callback),
1298 match_list1_(match_list1), match_list2_(match_list2) {
1299 match_list1_->assign(count1, -1);
1300 match_list2_->assign(count2, -1);
1301 }
1302
FindMaximumMatch(bool early_return)1303 int MaximumMatcher::FindMaximumMatch(bool early_return) {
1304 int result = 0;
1305 for (int i = 0; i < count1_; ++i) {
1306 vector<bool> visited(count1_);
1307 if (FindArgumentPathDFS(i, &visited)) {
1308 ++result;
1309 } else if (early_return) {
1310 return 0;
1311 }
1312 }
1313 // Backfill match_list1_ as we only filled match_list2_ when finding
1314 // argumenting pathes.
1315 for (int i = 0; i < count2_; ++i) {
1316 if ((*match_list2_)[i] != -1) {
1317 (*match_list1_)[(*match_list2_)[i]] = i;
1318 }
1319 }
1320 return result;
1321 }
1322
Match(int left,int right)1323 bool MaximumMatcher::Match(int left, int right) {
1324 pair<int, int> p(left, right);
1325 map<pair<int, int>, bool>::iterator it = cached_match_results_.find(p);
1326 if (it != cached_match_results_.end()) {
1327 return it->second;
1328 }
1329 cached_match_results_[p] = match_callback_->Run(left, right);
1330 return cached_match_results_[p];
1331 }
1332
FindArgumentPathDFS(int v,vector<bool> * visited)1333 bool MaximumMatcher::FindArgumentPathDFS(int v, vector<bool>* visited) {
1334 (*visited)[v] = true;
1335 // We try to match those un-matched nodes on the right side first. This is
1336 // the step that the navie greedy matching algorithm uses. In the best cases
1337 // where the greedy algorithm can find a maximum matching, we will always
1338 // find a match in this step and the performance will be identical to the
1339 // greedy algorithm.
1340 for (int i = 0; i < count2_; ++i) {
1341 int matched = (*match_list2_)[i];
1342 if (matched == -1 && Match(v, i)) {
1343 (*match_list2_)[i] = v;
1344 return true;
1345 }
1346 }
1347 // Then we try those already matched nodes and see if we can find an
1348 // alternaive match for the node matched to them.
1349 // The greedy algorithm will stop before this and fail to produce the
1350 // correct result.
1351 for (int i = 0; i < count2_; ++i) {
1352 int matched = (*match_list2_)[i];
1353 if (matched != -1 && Match(v, i)) {
1354 if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1355 (*match_list2_)[i] = v;
1356 return true;
1357 }
1358 }
1359 }
1360 return false;
1361 }
1362
1363 } // namespace
1364
MatchRepeatedFieldIndices(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,const vector<SpecificField> & parent_fields,vector<int> * match_list1,vector<int> * match_list2)1365 bool MessageDifferencer::MatchRepeatedFieldIndices(
1366 const Message& message1,
1367 const Message& message2,
1368 const FieldDescriptor* repeated_field,
1369 const vector<SpecificField>& parent_fields,
1370 vector<int>* match_list1,
1371 vector<int>* match_list2) {
1372 const int count1 =
1373 message1.GetReflection()->FieldSize(message1, repeated_field);
1374 const int count2 =
1375 message2.GetReflection()->FieldSize(message2, repeated_field);
1376 const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
1377
1378 match_list1->assign(count1, -1);
1379 match_list2->assign(count2, -1);
1380
1381 SpecificField specific_field;
1382 specific_field.field = repeated_field;
1383
1384 bool success = true;
1385 // Find potential match if this is a special repeated field.
1386 if (key_comparator != NULL || IsTreatedAsSet(repeated_field)) {
1387 if (scope_ == PARTIAL) {
1388 // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1389 // doesn't necessarily imply Compare(b, c). Therefore a naive greedy
1390 // algorithm will fail to find a maximum matching.
1391 // Here we use the argumenting path algorithm.
1392 MaximumMatcher::NodeMatchCallback* callback =
1393 ::google::protobuf::internal::NewPermanentCallback(
1394 this, &MessageDifferencer::IsMatch,
1395 repeated_field, key_comparator,
1396 &message1, &message2, parent_fields);
1397 MaximumMatcher matcher(count1, count2, callback, match_list1,
1398 match_list2);
1399 // If diff info is not needed, we should end the matching process as
1400 // soon as possible if not all items can be matched.
1401 bool early_return = (reporter_ == NULL);
1402 int match_count = matcher.FindMaximumMatch(early_return);
1403 if (match_count != count1 && reporter_ == NULL) return false;
1404 success = success && (match_count == count1);
1405 } else {
1406 for (int i = 0; i < count1; ++i) {
1407 // Indicates any matched elements for this repeated field.
1408 bool match = false;
1409
1410 specific_field.index = i;
1411 specific_field.new_index = i;
1412
1413 for (int j = 0; j < count2; j++) {
1414 if (match_list2->at(j) != -1) continue;
1415 specific_field.index = i;
1416 specific_field.new_index = j;
1417
1418 match = IsMatch(repeated_field, key_comparator,
1419 &message1, &message2, parent_fields, i, j);
1420
1421 if (match) {
1422 match_list1->at(specific_field.index) = specific_field.new_index;
1423 match_list2->at(specific_field.new_index) = specific_field.index;
1424 break;
1425 }
1426 }
1427 if (!match && reporter_ == NULL) return false;
1428 success = success && match;
1429 }
1430 }
1431 } else {
1432 // If this field should be treated as list, just label the match_list.
1433 for (int i = 0; i < count1 && i < count2; i++) {
1434 match_list1->at(i) = i;
1435 match_list2->at(i) = i;
1436 }
1437 }
1438
1439 return success;
1440 }
1441
GetFieldComparisonResult(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,const FieldContext * field_context)1442 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1443 const Message& message1, const Message& message2,
1444 const FieldDescriptor* field, int index1, int index2,
1445 const FieldContext* field_context) {
1446 FieldComparator* comparator = field_comparator_ != NULL ?
1447 field_comparator_ : &default_field_comparator_;
1448 return comparator->Compare(message1, message2, field,
1449 index1, index2, field_context);
1450 }
1451
1452 // ===========================================================================
1453
Reporter()1454 MessageDifferencer::Reporter::Reporter() { }
~Reporter()1455 MessageDifferencer::Reporter::~Reporter() {}
1456
1457 // ===========================================================================
1458
MapKeyComparator()1459 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
~MapKeyComparator()1460 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1461
1462 // ===========================================================================
1463
IgnoreCriteria()1464 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
~IgnoreCriteria()1465 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1466
1467 // ===========================================================================
1468
1469 // Note that the printer's delimiter is not used, because if we are given a
1470 // printer, we don't know its delimiter.
StreamReporter(io::ZeroCopyOutputStream * output)1471 MessageDifferencer::StreamReporter::StreamReporter(
1472 io::ZeroCopyOutputStream* output) : printer_(new io::Printer(output, '$')),
1473 delete_printer_(true),
1474 report_modified_aggregates_(false) { }
1475
StreamReporter(io::Printer * printer)1476 MessageDifferencer::StreamReporter::StreamReporter(
1477 io::Printer* printer) : printer_(printer),
1478 delete_printer_(false),
1479 report_modified_aggregates_(false) { }
1480
~StreamReporter()1481 MessageDifferencer::StreamReporter::~StreamReporter() {
1482 if (delete_printer_) delete printer_;
1483 }
1484
PrintPath(const vector<SpecificField> & field_path,bool left_side)1485 void MessageDifferencer::StreamReporter::PrintPath(
1486 const vector<SpecificField>& field_path, bool left_side) {
1487 for (int i = 0; i < field_path.size(); ++i) {
1488 if (i > 0) {
1489 printer_->Print(".");
1490 }
1491
1492 SpecificField specific_field = field_path[i];
1493
1494 if (specific_field.field != NULL) {
1495 if (specific_field.field->is_extension()) {
1496 printer_->Print("($name$)", "name",
1497 specific_field.field->full_name());
1498 } else {
1499 printer_->PrintRaw(specific_field.field->name());
1500 }
1501 } else {
1502 printer_->PrintRaw(SimpleItoa(specific_field.unknown_field_number));
1503 }
1504 if (left_side && specific_field.index >= 0) {
1505 printer_->Print("[$name$]", "name", SimpleItoa(specific_field.index));
1506 }
1507 if (!left_side && specific_field.new_index >= 0) {
1508 printer_->Print("[$name$]", "name", SimpleItoa(specific_field.new_index));
1509 }
1510 }
1511 }
1512
1513 void MessageDifferencer::
PrintValue(const Message & message,const vector<SpecificField> & field_path,bool left_side)1514 StreamReporter::PrintValue(const Message& message,
1515 const vector<SpecificField>& field_path,
1516 bool left_side) {
1517 const SpecificField& specific_field = field_path.back();
1518 const FieldDescriptor* field = specific_field.field;
1519 if (field != NULL) {
1520 string output;
1521 int index = left_side ? specific_field.index : specific_field.new_index;
1522 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1523 const Reflection* reflection = message.GetReflection();
1524 const Message& field_message = field->is_repeated() ?
1525 reflection->GetRepeatedMessage(message, field, index) :
1526 reflection->GetMessage(message, field);
1527 output = field_message.ShortDebugString();
1528 if (output.empty()) {
1529 printer_->Print("{ }");
1530 } else {
1531 printer_->Print("{ $name$ }", "name", output);
1532 }
1533 } else {
1534 TextFormat::PrintFieldValueToString(message, field, index, &output);
1535 printer_->PrintRaw(output);
1536 }
1537 } else {
1538 const UnknownFieldSet* unknown_fields =
1539 (left_side ?
1540 specific_field.unknown_field_set1 :
1541 specific_field.unknown_field_set2);
1542 const UnknownField* unknown_field = &unknown_fields->field(
1543 left_side ?
1544 specific_field.unknown_field_index1 :
1545 specific_field.unknown_field_index2);
1546 PrintUnknownFieldValue(unknown_field);
1547 }
1548 }
1549
1550 void MessageDifferencer::
PrintUnknownFieldValue(const UnknownField * unknown_field)1551 StreamReporter::PrintUnknownFieldValue(const UnknownField* unknown_field) {
1552 GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1553
1554 string output;
1555 switch (unknown_field->type()) {
1556 case UnknownField::TYPE_VARINT:
1557 output = SimpleItoa(unknown_field->varint());
1558 break;
1559 case UnknownField::TYPE_FIXED32:
1560 output = StrCat("0x", strings::Hex(unknown_field->fixed32(),
1561 strings::ZERO_PAD_8));
1562 break;
1563 case UnknownField::TYPE_FIXED64:
1564 output = StrCat("0x", strings::Hex(unknown_field->fixed64(),
1565 strings::ZERO_PAD_16));
1566 break;
1567 case UnknownField::TYPE_LENGTH_DELIMITED:
1568 output = StringPrintf("\"%s\"",
1569 CEscape(unknown_field->length_delimited()).c_str());
1570 break;
1571 case UnknownField::TYPE_GROUP:
1572 // TODO(kenton): Print the contents of the group like we do for
1573 // messages. Requires an equivalent of ShortDebugString() for
1574 // UnknownFieldSet.
1575 output = "{ ... }";
1576 break;
1577 }
1578 printer_->PrintRaw(output);
1579 }
1580
Print(const string & str)1581 void MessageDifferencer::StreamReporter::Print(const string& str) {
1582 printer_->Print(str.c_str());
1583 }
1584
ReportAdded(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1585 void MessageDifferencer::StreamReporter::ReportAdded(
1586 const Message& message1,
1587 const Message& message2,
1588 const vector<SpecificField>& field_path) {
1589 printer_->Print("added: ");
1590 PrintPath(field_path, false);
1591 printer_->Print(": ");
1592 PrintValue(message2, field_path, false);
1593 printer_->Print("\n"); // Print for newlines.
1594 }
1595
ReportDeleted(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1596 void MessageDifferencer::StreamReporter::ReportDeleted(
1597 const Message& message1,
1598 const Message& message2,
1599 const vector<SpecificField>& field_path) {
1600 printer_->Print("deleted: ");
1601 PrintPath(field_path, true);
1602 printer_->Print(": ");
1603 PrintValue(message1, field_path, true);
1604 printer_->Print("\n"); // Print for newlines
1605 }
1606
ReportModified(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1607 void MessageDifferencer::StreamReporter::ReportModified(
1608 const Message& message1,
1609 const Message& message2,
1610 const vector<SpecificField>& field_path) {
1611 if (!report_modified_aggregates_ && field_path.back().field == NULL) {
1612 if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
1613 // Any changes to the subfields have already been printed.
1614 return;
1615 }
1616 } else if (!report_modified_aggregates_) {
1617 if (field_path.back().field->cpp_type() ==
1618 FieldDescriptor::CPPTYPE_MESSAGE) {
1619 // Any changes to the subfields have already been printed.
1620 return;
1621 }
1622 }
1623
1624 printer_->Print("modified: ");
1625 PrintPath(field_path, true);
1626 if (CheckPathChanged(field_path)) {
1627 printer_->Print(" -> ");
1628 PrintPath(field_path, false);
1629 }
1630 printer_->Print(": ");
1631 PrintValue(message1, field_path, true);
1632 printer_->Print(" -> ");
1633 PrintValue(message2, field_path, false);
1634 printer_->Print("\n"); // Print for newlines.
1635 }
1636
ReportMoved(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1637 void MessageDifferencer::StreamReporter::ReportMoved(
1638 const Message& message1,
1639 const Message& message2,
1640 const vector<SpecificField>& field_path) {
1641 printer_->Print("moved: ");
1642 PrintPath(field_path, true);
1643 printer_->Print(" -> ");
1644 PrintPath(field_path, false);
1645 printer_->Print(" : ");
1646 PrintValue(message1, field_path, true);
1647 printer_->Print("\n"); // Print for newlines.
1648 }
1649
ReportMatched(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1650 void MessageDifferencer::StreamReporter::ReportMatched(
1651 const Message& message1,
1652 const Message& message2,
1653 const vector<SpecificField>& field_path) {
1654 printer_->Print("matched: ");
1655 PrintPath(field_path, true);
1656 if (CheckPathChanged(field_path)) {
1657 printer_->Print(" -> ");
1658 PrintPath(field_path, false);
1659 }
1660 printer_->Print(" : ");
1661 PrintValue(message1, field_path, true);
1662 printer_->Print("\n"); // Print for newlines.
1663 }
1664
ReportIgnored(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1665 void MessageDifferencer::StreamReporter::ReportIgnored(
1666 const Message& message1,
1667 const Message& message2,
1668 const vector<SpecificField>& field_path) {
1669 printer_->Print("ignored: ");
1670 PrintPath(field_path, true);
1671 if (CheckPathChanged(field_path)) {
1672 printer_->Print(" -> ");
1673 PrintPath(field_path, false);
1674 }
1675 printer_->Print("\n"); // Print for newlines.
1676 }
1677
ReportUnknownFieldIgnored(const Message & message1,const Message & message2,const vector<SpecificField> & field_path)1678 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
1679 const Message& message1, const Message& message2,
1680 const vector<SpecificField>& field_path) {
1681 printer_->Print("ignored: ");
1682 PrintPath(field_path, true);
1683 if (CheckPathChanged(field_path)) {
1684 printer_->Print(" -> ");
1685 PrintPath(field_path, false);
1686 }
1687 printer_->Print("\n"); // Print for newlines.
1688 }
1689
1690 } // namespace util
1691 } // namespace protobuf
1692 } // namespace google
1693