1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: anuraag@google.com (Anuraag Agrawal)
32 // Author: tibell@google.com (Johan Tibell)
33 
34 #include <google/protobuf/pyext/message.h>
35 
36 #include <map>
37 #include <memory>
38 #ifndef _SHARED_PTR_H
39 #include <google/protobuf/stubs/shared_ptr.h>
40 #endif
41 #include <string>
42 #include <vector>
43 #include <structmember.h>  // A Python header file.
44 
45 #ifndef PyVarObject_HEAD_INIT
46 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
47 #endif
48 #ifndef Py_TYPE
49 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
50 #endif
51 #include <google/protobuf/descriptor.pb.h>
52 #include <google/protobuf/stubs/common.h>
53 #include <google/protobuf/stubs/logging.h>
54 #include <google/protobuf/io/coded_stream.h>
55 #include <google/protobuf/util/message_differencer.h>
56 #include <google/protobuf/descriptor.h>
57 #include <google/protobuf/message.h>
58 #include <google/protobuf/text_format.h>
59 #include <google/protobuf/unknown_field_set.h>
60 #include <google/protobuf/pyext/descriptor.h>
61 #include <google/protobuf/pyext/descriptor_pool.h>
62 #include <google/protobuf/pyext/extension_dict.h>
63 #include <google/protobuf/pyext/repeated_composite_container.h>
64 #include <google/protobuf/pyext/repeated_scalar_container.h>
65 #include <google/protobuf/pyext/map_container.h>
66 #include <google/protobuf/pyext/scoped_pyobject_ptr.h>
67 #include <google/protobuf/stubs/strutil.h>
68 
69 #if PY_MAJOR_VERSION >= 3
70   #define PyInt_Check PyLong_Check
71   #define PyInt_AsLong PyLong_AsLong
72   #define PyInt_FromLong PyLong_FromLong
73   #define PyInt_FromSize_t PyLong_FromSize_t
74   #define PyString_Check PyUnicode_Check
75   #define PyString_FromString PyUnicode_FromString
76   #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
77   #if PY_VERSION_HEX < 0x03030000
78     #error "Python 3.0 - 3.2 are not supported."
79   #else
80   #define PyString_AsString(ob) \
81     (PyUnicode_Check(ob)? PyUnicode_AsUTF8(ob): PyBytes_AsString(ob))
82   #define PyString_AsStringAndSize(ob, charpp, sizep) \
83     (PyUnicode_Check(ob)? \
84        ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
85        PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
86   #endif
87 #endif
88 
89 namespace google {
90 namespace protobuf {
91 namespace python {
92 
93 static PyObject* kDESCRIPTOR;
94 static PyObject* k_extensions_by_name;
95 static PyObject* k_extensions_by_number;
96 PyObject* EnumTypeWrapper_class;
97 static PyObject* PythonMessage_class;
98 static PyObject* kEmptyWeakref;
99 static PyObject* WKT_classes = NULL;
100 
101 namespace message_meta {
102 
103 static int InsertEmptyWeakref(PyTypeObject* base);
104 
105 // Add the number of a field descriptor to the containing message class.
106 // Equivalent to:
107 //   _cls.<field>_FIELD_NUMBER = <number>
AddFieldNumberToClass(PyObject * cls,const FieldDescriptor * field_descriptor)108 static bool AddFieldNumberToClass(
109     PyObject* cls, const FieldDescriptor* field_descriptor) {
110   string constant_name = field_descriptor->name() + "_FIELD_NUMBER";
111   UpperString(&constant_name);
112   ScopedPyObjectPtr attr_name(PyString_FromStringAndSize(
113       constant_name.c_str(), constant_name.size()));
114   if (attr_name == NULL) {
115     return false;
116   }
117   ScopedPyObjectPtr number(PyInt_FromLong(field_descriptor->number()));
118   if (number == NULL) {
119     return false;
120   }
121   if (PyObject_SetAttr(cls, attr_name.get(), number.get()) == -1) {
122     return false;
123   }
124   return true;
125 }
126 
127 
128 // Finalize the creation of the Message class.
AddDescriptors(PyObject * cls,const Descriptor * descriptor)129 static int AddDescriptors(PyObject* cls, const Descriptor* descriptor) {
130   // If there are extension_ranges, the message is "extendable", and extension
131   // classes will register themselves in this class.
132   if (descriptor->extension_range_count() > 0) {
133     ScopedPyObjectPtr by_name(PyDict_New());
134     if (PyObject_SetAttr(cls, k_extensions_by_name, by_name.get()) < 0) {
135       return -1;
136     }
137     ScopedPyObjectPtr by_number(PyDict_New());
138     if (PyObject_SetAttr(cls, k_extensions_by_number, by_number.get()) < 0) {
139       return -1;
140     }
141   }
142 
143   // For each field set: cls.<field>_FIELD_NUMBER = <number>
144   for (int i = 0; i < descriptor->field_count(); ++i) {
145     if (!AddFieldNumberToClass(cls, descriptor->field(i))) {
146       return -1;
147     }
148   }
149 
150   // For each enum set cls.<enum name> = EnumTypeWrapper(<enum descriptor>).
151   for (int i = 0; i < descriptor->enum_type_count(); ++i) {
152     const EnumDescriptor* enum_descriptor = descriptor->enum_type(i);
153     ScopedPyObjectPtr enum_type(
154         PyEnumDescriptor_FromDescriptor(enum_descriptor));
155     if (enum_type == NULL) {
156       return -1;
157      }
158     // Add wrapped enum type to message class.
159     ScopedPyObjectPtr wrapped(PyObject_CallFunctionObjArgs(
160         EnumTypeWrapper_class, enum_type.get(), NULL));
161     if (wrapped == NULL) {
162       return -1;
163     }
164     if (PyObject_SetAttrString(
165             cls, enum_descriptor->name().c_str(), wrapped.get()) == -1) {
166       return -1;
167     }
168 
169     // For each enum value add cls.<name> = <number>
170     for (int j = 0; j < enum_descriptor->value_count(); ++j) {
171       const EnumValueDescriptor* enum_value_descriptor =
172           enum_descriptor->value(j);
173       ScopedPyObjectPtr value_number(PyInt_FromLong(
174           enum_value_descriptor->number()));
175       if (value_number == NULL) {
176         return -1;
177       }
178       if (PyObject_SetAttrString(cls, enum_value_descriptor->name().c_str(),
179                                  value_number.get()) == -1) {
180         return -1;
181       }
182     }
183   }
184 
185   // For each extension set cls.<extension name> = <extension descriptor>.
186   //
187   // Extension descriptors come from
188   // <message descriptor>.extensions_by_name[name]
189   // which was defined previously.
190   for (int i = 0; i < descriptor->extension_count(); ++i) {
191     const google::protobuf::FieldDescriptor* field = descriptor->extension(i);
192     ScopedPyObjectPtr extension_field(PyFieldDescriptor_FromDescriptor(field));
193     if (extension_field == NULL) {
194       return -1;
195     }
196 
197     // Add the extension field to the message class.
198     if (PyObject_SetAttrString(
199             cls, field->name().c_str(), extension_field.get()) == -1) {
200       return -1;
201     }
202 
203     // For each extension set cls.<extension name>_FIELD_NUMBER = <number>.
204     if (!AddFieldNumberToClass(cls, field)) {
205       return -1;
206     }
207   }
208 
209   return 0;
210 }
211 
New(PyTypeObject * type,PyObject * args,PyObject * kwargs)212 static PyObject* New(PyTypeObject* type,
213                      PyObject* args, PyObject* kwargs) {
214   static char *kwlist[] = {"name", "bases", "dict", 0};
215   PyObject *bases, *dict;
216   const char* name;
217 
218   // Check arguments: (name, bases, dict)
219   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", kwlist,
220                                    &name,
221                                    &PyTuple_Type, &bases,
222                                    &PyDict_Type, &dict)) {
223     return NULL;
224   }
225 
226   // Check bases: only (), or (message.Message,) are allowed
227   if (!(PyTuple_GET_SIZE(bases) == 0 ||
228         (PyTuple_GET_SIZE(bases) == 1 &&
229          PyTuple_GET_ITEM(bases, 0) == PythonMessage_class))) {
230     PyErr_SetString(PyExc_TypeError,
231                     "A Message class can only inherit from Message");
232     return NULL;
233   }
234 
235   // Check dict['DESCRIPTOR']
236   PyObject* py_descriptor = PyDict_GetItem(dict, kDESCRIPTOR);
237   if (py_descriptor == NULL) {
238     PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
239     return NULL;
240   }
241   if (!PyObject_TypeCheck(py_descriptor, &PyMessageDescriptor_Type)) {
242     PyErr_Format(PyExc_TypeError, "Expected a message Descriptor, got %s",
243                  py_descriptor->ob_type->tp_name);
244     return NULL;
245   }
246 
247   // Build the arguments to the base metaclass.
248   // We change the __bases__ classes.
249   ScopedPyObjectPtr new_args;
250   const Descriptor* message_descriptor =
251       PyMessageDescriptor_AsDescriptor(py_descriptor);
252   if (message_descriptor == NULL) {
253     return NULL;
254   }
255 
256   if (WKT_classes == NULL) {
257     ScopedPyObjectPtr well_known_types(PyImport_ImportModule(
258         "google.protobuf.internal.well_known_types"));
259     GOOGLE_DCHECK(well_known_types != NULL);
260 
261     WKT_classes = PyObject_GetAttrString(well_known_types.get(), "WKTBASES");
262     GOOGLE_DCHECK(WKT_classes != NULL);
263   }
264 
265   PyObject* well_known_class = PyDict_GetItemString(
266       WKT_classes, message_descriptor->full_name().c_str());
267   if (well_known_class == NULL) {
268     new_args.reset(Py_BuildValue("s(OO)O", name, &CMessage_Type,
269                                  PythonMessage_class, dict));
270   } else {
271     new_args.reset(Py_BuildValue("s(OOO)O", name, &CMessage_Type,
272                                  PythonMessage_class, well_known_class, dict));
273   }
274 
275   if (new_args == NULL) {
276     return NULL;
277   }
278   // Call the base metaclass.
279   ScopedPyObjectPtr result(PyType_Type.tp_new(type, new_args.get(), NULL));
280   if (result == NULL) {
281     return NULL;
282   }
283   CMessageClass* newtype = reinterpret_cast<CMessageClass*>(result.get());
284 
285   // Insert the empty weakref into the base classes.
286   if (InsertEmptyWeakref(
287           reinterpret_cast<PyTypeObject*>(PythonMessage_class)) < 0 ||
288       InsertEmptyWeakref(&CMessage_Type) < 0) {
289     return NULL;
290   }
291 
292   // Cache the descriptor, both as Python object and as C++ pointer.
293   const Descriptor* descriptor =
294       PyMessageDescriptor_AsDescriptor(py_descriptor);
295   if (descriptor == NULL) {
296     return NULL;
297   }
298   Py_INCREF(py_descriptor);
299   newtype->py_message_descriptor = py_descriptor;
300   newtype->message_descriptor = descriptor;
301   // TODO(amauryfa): Don't always use the canonical pool of the descriptor,
302   // use the MessageFactory optionally passed in the class dict.
303   newtype->py_descriptor_pool = GetDescriptorPool_FromPool(
304       descriptor->file()->pool());
305   if (newtype->py_descriptor_pool == NULL) {
306     return NULL;
307   }
308   Py_INCREF(newtype->py_descriptor_pool);
309 
310   // Add the message to the DescriptorPool.
311   if (cdescriptor_pool::RegisterMessageClass(newtype->py_descriptor_pool,
312                                              descriptor, newtype) < 0) {
313     return NULL;
314   }
315 
316   // Continue with type initialization: add other descriptors, enum values...
317   if (AddDescriptors(result.get(), descriptor) < 0) {
318     return NULL;
319   }
320   return result.release();
321 }
322 
Dealloc(CMessageClass * self)323 static void Dealloc(CMessageClass *self) {
324   Py_DECREF(self->py_message_descriptor);
325   Py_DECREF(self->py_descriptor_pool);
326   Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
327 }
328 
329 
330 // This function inserts and empty weakref at the end of the list of
331 // subclasses for the main protocol buffer Message class.
332 //
333 // This eliminates a O(n^2) behaviour in the internal add_subclass
334 // routine.
InsertEmptyWeakref(PyTypeObject * base_type)335 static int InsertEmptyWeakref(PyTypeObject *base_type) {
336 #if PY_MAJOR_VERSION >= 3
337   // Python 3.4 has already included the fix for the issue that this
338   // hack addresses. For further background and the fix please see
339   // https://bugs.python.org/issue17936.
340   return 0;
341 #else
342   PyObject *subclasses = base_type->tp_subclasses;
343   if (subclasses && PyList_CheckExact(subclasses)) {
344     return PyList_Append(subclasses, kEmptyWeakref);
345   }
346   return 0;
347 #endif  // PY_MAJOR_VERSION >= 3
348 }
349 
350 }  // namespace message_meta
351 
352 PyTypeObject CMessageClass_Type = {
353   PyVarObject_HEAD_INIT(&PyType_Type, 0)
354   FULL_MODULE_NAME ".MessageMeta",     // tp_name
355   sizeof(CMessageClass),               // tp_basicsize
356   0,                                   // tp_itemsize
357   (destructor)message_meta::Dealloc,   // tp_dealloc
358   0,                                   // tp_print
359   0,                                   // tp_getattr
360   0,                                   // tp_setattr
361   0,                                   // tp_compare
362   0,                                   // tp_repr
363   0,                                   // tp_as_number
364   0,                                   // tp_as_sequence
365   0,                                   // tp_as_mapping
366   0,                                   // tp_hash
367   0,                                   // tp_call
368   0,                                   // tp_str
369   0,                                   // tp_getattro
370   0,                                   // tp_setattro
371   0,                                   // tp_as_buffer
372   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  // tp_flags
373   "The metaclass of ProtocolMessages",  // tp_doc
374   0,                                   // tp_traverse
375   0,                                   // tp_clear
376   0,                                   // tp_richcompare
377   0,                                   // tp_weaklistoffset
378   0,                                   // tp_iter
379   0,                                   // tp_iternext
380   0,                                   // tp_methods
381   0,                                   // tp_members
382   0,                                   // tp_getset
383   0,                                   // tp_base
384   0,                                   // tp_dict
385   0,                                   // tp_descr_get
386   0,                                   // tp_descr_set
387   0,                                   // tp_dictoffset
388   0,                                   // tp_init
389   0,                                   // tp_alloc
390   message_meta::New,                   // tp_new
391 };
392 
CheckMessageClass(PyTypeObject * cls)393 static CMessageClass* CheckMessageClass(PyTypeObject* cls) {
394   if (!PyObject_TypeCheck(cls, &CMessageClass_Type)) {
395     PyErr_Format(PyExc_TypeError, "Class %s is not a Message", cls->tp_name);
396     return NULL;
397   }
398   return reinterpret_cast<CMessageClass*>(cls);
399 }
400 
GetMessageDescriptor(PyTypeObject * cls)401 static const Descriptor* GetMessageDescriptor(PyTypeObject* cls) {
402   CMessageClass* type = CheckMessageClass(cls);
403   if (type == NULL) {
404     return NULL;
405   }
406   return type->message_descriptor;
407 }
408 
409 // Forward declarations
410 namespace cmessage {
411 int InternalReleaseFieldByDescriptor(
412     CMessage* self,
413     const FieldDescriptor* field_descriptor,
414     PyObject* composite_field);
415 }  // namespace cmessage
416 
417 // ---------------------------------------------------------------------
418 // Visiting the composite children of a CMessage
419 
420 struct ChildVisitor {
421   // Returns 0 on success, -1 on failure.
VisitRepeatedCompositeContainergoogle::protobuf::python::ChildVisitor422   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
423     return 0;
424   }
425 
426   // Returns 0 on success, -1 on failure.
VisitRepeatedScalarContainergoogle::protobuf::python::ChildVisitor427   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
428     return 0;
429   }
430 
431   // Returns 0 on success, -1 on failure.
VisitCMessagegoogle::protobuf::python::ChildVisitor432   int VisitCMessage(CMessage* cmessage,
433                     const FieldDescriptor* field_descriptor) {
434     return 0;
435   }
436 };
437 
438 // Apply a function to a composite field.  Does nothing if child is of
439 // non-composite type.
440 template<class Visitor>
VisitCompositeField(const FieldDescriptor * descriptor,PyObject * child,Visitor visitor)441 static int VisitCompositeField(const FieldDescriptor* descriptor,
442                                PyObject* child,
443                                Visitor visitor) {
444   if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
445     if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
446       if (descriptor->is_map()) {
447         MapContainer* container = reinterpret_cast<MapContainer*>(child);
448         if (visitor.VisitMapContainer(container) == -1) {
449           return -1;
450         }
451       } else {
452         RepeatedCompositeContainer* container =
453           reinterpret_cast<RepeatedCompositeContainer*>(child);
454         if (visitor.VisitRepeatedCompositeContainer(container) == -1)
455           return -1;
456       }
457     } else {
458       RepeatedScalarContainer* container =
459         reinterpret_cast<RepeatedScalarContainer*>(child);
460       if (visitor.VisitRepeatedScalarContainer(container) == -1)
461         return -1;
462     }
463   } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
464     CMessage* cmsg = reinterpret_cast<CMessage*>(child);
465     if (visitor.VisitCMessage(cmsg, descriptor) == -1)
466       return -1;
467   }
468   // The ExtensionDict might contain non-composite fields, which we
469   // skip here.
470   return 0;
471 }
472 
473 // Visit each composite field and extension field of this CMessage.
474 // Returns -1 on error and 0 on success.
475 template<class Visitor>
ForEachCompositeField(CMessage * self,Visitor visitor)476 int ForEachCompositeField(CMessage* self, Visitor visitor) {
477   Py_ssize_t pos = 0;
478   PyObject* key;
479   PyObject* field;
480 
481   // Visit normal fields.
482   if (self->composite_fields) {
483     // Never use self->message in this function, it may be already freed.
484     const Descriptor* message_descriptor =
485         GetMessageDescriptor(Py_TYPE(self));
486     while (PyDict_Next(self->composite_fields, &pos, &key, &field)) {
487       Py_ssize_t key_str_size;
488       char *key_str_data;
489       if (PyString_AsStringAndSize(key, &key_str_data, &key_str_size) != 0)
490         return -1;
491       const string key_str(key_str_data, key_str_size);
492       const FieldDescriptor* descriptor =
493         message_descriptor->FindFieldByName(key_str);
494       if (descriptor != NULL) {
495         if (VisitCompositeField(descriptor, field, visitor) == -1)
496           return -1;
497       }
498     }
499   }
500 
501   // Visit extension fields.
502   if (self->extensions != NULL) {
503     pos = 0;
504     while (PyDict_Next(self->extensions->values, &pos, &key, &field)) {
505       const FieldDescriptor* descriptor = cmessage::GetExtensionDescriptor(key);
506       if (descriptor == NULL)
507         return -1;
508       if (VisitCompositeField(descriptor, field, visitor) == -1)
509         return -1;
510     }
511   }
512 
513   return 0;
514 }
515 
516 // ---------------------------------------------------------------------
517 
518 // Constants used for integer type range checking.
519 PyObject* kPythonZero;
520 PyObject* kint32min_py;
521 PyObject* kint32max_py;
522 PyObject* kuint32max_py;
523 PyObject* kint64min_py;
524 PyObject* kint64max_py;
525 PyObject* kuint64max_py;
526 
527 PyObject* EncodeError_class;
528 PyObject* DecodeError_class;
529 PyObject* PickleError_class;
530 
531 // Constant PyString values used for GetAttr/GetItem.
532 static PyObject* k_cdescriptor;
533 static PyObject* kfull_name;
534 
535 /* Is 64bit */
FormatTypeError(PyObject * arg,char * expected_types)536 void FormatTypeError(PyObject* arg, char* expected_types) {
537   PyObject* repr = PyObject_Repr(arg);
538   if (repr) {
539     PyErr_Format(PyExc_TypeError,
540                  "%.100s has type %.100s, but expected one of: %s",
541                  PyString_AsString(repr),
542                  Py_TYPE(arg)->tp_name,
543                  expected_types);
544     Py_DECREF(repr);
545   }
546 }
547 
548 template<class T>
CheckAndGetInteger(PyObject * arg,T * value,PyObject * min,PyObject * max)549 bool CheckAndGetInteger(
550     PyObject* arg, T* value, PyObject* min, PyObject* max) {
551   bool is_long = PyLong_Check(arg);
552 #if PY_MAJOR_VERSION < 3
553   if (!PyInt_Check(arg) && !is_long) {
554     FormatTypeError(arg, "int, long");
555     return false;
556   }
557   if (PyObject_Compare(min, arg) > 0 || PyObject_Compare(max, arg) < 0) {
558 #else
559   if (!is_long) {
560     FormatTypeError(arg, "int");
561     return false;
562   }
563   if (PyObject_RichCompareBool(min, arg, Py_LE) != 1 ||
564       PyObject_RichCompareBool(max, arg, Py_GE) != 1) {
565 #endif
566     if (!PyErr_Occurred()) {
567       PyObject *s = PyObject_Str(arg);
568       if (s) {
569         PyErr_Format(PyExc_ValueError,
570                      "Value out of range: %s",
571                      PyString_AsString(s));
572         Py_DECREF(s);
573       }
574     }
575     return false;
576   }
577 #if PY_MAJOR_VERSION < 3
578   if (!is_long) {
579     *value = static_cast<T>(PyInt_AsLong(arg));
580   } else  // NOLINT
581 #endif
582   {
583     if (min == kPythonZero) {
584       *value = static_cast<T>(PyLong_AsUnsignedLongLong(arg));
585     } else {
586       *value = static_cast<T>(PyLong_AsLongLong(arg));
587     }
588   }
589   return true;
590 }
591 
592 // These are referenced by repeated_scalar_container, and must
593 // be explicitly instantiated.
594 template bool CheckAndGetInteger<int32>(
595     PyObject*, int32*, PyObject*, PyObject*);
596 template bool CheckAndGetInteger<int64>(
597     PyObject*, int64*, PyObject*, PyObject*);
598 template bool CheckAndGetInteger<uint32>(
599     PyObject*, uint32*, PyObject*, PyObject*);
600 template bool CheckAndGetInteger<uint64>(
601     PyObject*, uint64*, PyObject*, PyObject*);
602 
603 bool CheckAndGetDouble(PyObject* arg, double* value) {
604   if (!PyInt_Check(arg) && !PyLong_Check(arg) &&
605       !PyFloat_Check(arg)) {
606     FormatTypeError(arg, "int, long, float");
607     return false;
608   }
609   *value = PyFloat_AsDouble(arg);
610   return true;
611 }
612 
613 bool CheckAndGetFloat(PyObject* arg, float* value) {
614   double double_value;
615   if (!CheckAndGetDouble(arg, &double_value)) {
616     return false;
617   }
618   *value = static_cast<float>(double_value);
619   return true;
620 }
621 
622 bool CheckAndGetBool(PyObject* arg, bool* value) {
623   if (!PyInt_Check(arg) && !PyBool_Check(arg) && !PyLong_Check(arg)) {
624     FormatTypeError(arg, "int, long, bool");
625     return false;
626   }
627   *value = static_cast<bool>(PyInt_AsLong(arg));
628   return true;
629 }
630 
631 // Checks whether the given object (which must be "bytes" or "unicode") contains
632 // valid UTF-8.
633 bool IsValidUTF8(PyObject* obj) {
634   if (PyBytes_Check(obj)) {
635     PyObject* unicode = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
636 
637     // Clear the error indicator; we report our own error when desired.
638     PyErr_Clear();
639 
640     if (unicode) {
641       Py_DECREF(unicode);
642       return true;
643     } else {
644       return false;
645     }
646   } else {
647     // Unicode object, known to be valid UTF-8.
648     return true;
649   }
650 }
651 
652 bool AllowInvalidUTF8(const FieldDescriptor* field) { return false; }
653 
654 PyObject* CheckString(PyObject* arg, const FieldDescriptor* descriptor) {
655   GOOGLE_DCHECK(descriptor->type() == FieldDescriptor::TYPE_STRING ||
656          descriptor->type() == FieldDescriptor::TYPE_BYTES);
657   if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
658     if (!PyBytes_Check(arg) && !PyUnicode_Check(arg)) {
659       FormatTypeError(arg, "bytes, unicode");
660       return NULL;
661     }
662 
663     if (!IsValidUTF8(arg) && !AllowInvalidUTF8(descriptor)) {
664       PyObject* repr = PyObject_Repr(arg);
665       PyErr_Format(PyExc_ValueError,
666                    "%s has type str, but isn't valid UTF-8 "
667                    "encoding. Non-UTF-8 strings must be converted to "
668                    "unicode objects before being added.",
669                    PyString_AsString(repr));
670       Py_DECREF(repr);
671       return NULL;
672     }
673   } else if (!PyBytes_Check(arg)) {
674     FormatTypeError(arg, "bytes");
675     return NULL;
676   }
677 
678   PyObject* encoded_string = NULL;
679   if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
680     if (PyBytes_Check(arg)) {
681       // The bytes were already validated as correctly encoded UTF-8 above.
682       encoded_string = arg;  // Already encoded.
683       Py_INCREF(encoded_string);
684     } else {
685       encoded_string = PyUnicode_AsEncodedObject(arg, "utf-8", NULL);
686     }
687   } else {
688     // In this case field type is "bytes".
689     encoded_string = arg;
690     Py_INCREF(encoded_string);
691   }
692 
693   return encoded_string;
694 }
695 
696 bool CheckAndSetString(
697     PyObject* arg, Message* message,
698     const FieldDescriptor* descriptor,
699     const Reflection* reflection,
700     bool append,
701     int index) {
702   ScopedPyObjectPtr encoded_string(CheckString(arg, descriptor));
703 
704   if (encoded_string.get() == NULL) {
705     return false;
706   }
707 
708   char* value;
709   Py_ssize_t value_len;
710   if (PyBytes_AsStringAndSize(encoded_string.get(), &value, &value_len) < 0) {
711     return false;
712   }
713 
714   string value_string(value, value_len);
715   if (append) {
716     reflection->AddString(message, descriptor, value_string);
717   } else if (index < 0) {
718     reflection->SetString(message, descriptor, value_string);
719   } else {
720     reflection->SetRepeatedString(message, descriptor, index, value_string);
721   }
722   return true;
723 }
724 
725 PyObject* ToStringObject(const FieldDescriptor* descriptor, string value) {
726   if (descriptor->type() != FieldDescriptor::TYPE_STRING) {
727     return PyBytes_FromStringAndSize(value.c_str(), value.length());
728   }
729 
730   PyObject* result = PyUnicode_DecodeUTF8(value.c_str(), value.length(), NULL);
731   // If the string can't be decoded in UTF-8, just return a string object that
732   // contains the raw bytes. This can't happen if the value was assigned using
733   // the members of the Python message object, but can happen if the values were
734   // parsed from the wire (binary).
735   if (result == NULL) {
736     PyErr_Clear();
737     result = PyBytes_FromStringAndSize(value.c_str(), value.length());
738   }
739   return result;
740 }
741 
742 bool CheckFieldBelongsToMessage(const FieldDescriptor* field_descriptor,
743                                 const Message* message) {
744   if (message->GetDescriptor() == field_descriptor->containing_type()) {
745     return true;
746   }
747   PyErr_Format(PyExc_KeyError, "Field '%s' does not belong to message '%s'",
748                field_descriptor->full_name().c_str(),
749                message->GetDescriptor()->full_name().c_str());
750   return false;
751 }
752 
753 namespace cmessage {
754 
755 PyDescriptorPool* GetDescriptorPoolForMessage(CMessage* message) {
756   // No need to check the type: the type of instances of CMessage is always
757   // an instance of CMessageClass. Let's prove it with a debug-only check.
758   GOOGLE_DCHECK(PyObject_TypeCheck(message, &CMessage_Type));
759   return reinterpret_cast<CMessageClass*>(Py_TYPE(message))->py_descriptor_pool;
760 }
761 
762 MessageFactory* GetFactoryForMessage(CMessage* message) {
763   return GetDescriptorPoolForMessage(message)->message_factory;
764 }
765 
766 static int MaybeReleaseOverlappingOneofField(
767     CMessage* cmessage,
768     const FieldDescriptor* field) {
769 #ifdef GOOGLE_PROTOBUF_HAS_ONEOF
770   Message* message = cmessage->message;
771   const Reflection* reflection = message->GetReflection();
772   if (!field->containing_oneof() ||
773       !reflection->HasOneof(*message, field->containing_oneof()) ||
774       reflection->HasField(*message, field)) {
775     // No other field in this oneof, no need to release.
776     return 0;
777   }
778 
779   const OneofDescriptor* oneof = field->containing_oneof();
780   const FieldDescriptor* existing_field =
781       reflection->GetOneofFieldDescriptor(*message, oneof);
782   if (existing_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
783     // Non-message fields don't need to be released.
784     return 0;
785   }
786   const char* field_name = existing_field->name().c_str();
787   PyObject* child_message = cmessage->composite_fields ?
788       PyDict_GetItemString(cmessage->composite_fields, field_name) : NULL;
789   if (child_message == NULL) {
790     // No python reference to this field so no need to release.
791     return 0;
792   }
793 
794   if (InternalReleaseFieldByDescriptor(
795           cmessage, existing_field, child_message) < 0) {
796     return -1;
797   }
798   return PyDict_DelItemString(cmessage->composite_fields, field_name);
799 #else
800   return 0;
801 #endif
802 }
803 
804 // ---------------------------------------------------------------------
805 // Making a message writable
806 
807 static Message* GetMutableMessage(
808     CMessage* parent,
809     const FieldDescriptor* parent_field) {
810   Message* parent_message = parent->message;
811   const Reflection* reflection = parent_message->GetReflection();
812   if (MaybeReleaseOverlappingOneofField(parent, parent_field) < 0) {
813     return NULL;
814   }
815   return reflection->MutableMessage(
816       parent_message, parent_field, GetFactoryForMessage(parent));
817 }
818 
819 struct FixupMessageReference : public ChildVisitor {
820   // message must outlive this object.
821   explicit FixupMessageReference(Message* message) :
822       message_(message) {}
823 
824   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
825     container->message = message_;
826     return 0;
827   }
828 
829   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
830     container->message = message_;
831     return 0;
832   }
833 
834   int VisitMapContainer(MapContainer* container) {
835     container->message = message_;
836     return 0;
837   }
838 
839  private:
840   Message* message_;
841 };
842 
843 int AssureWritable(CMessage* self) {
844   if (self == NULL || !self->read_only) {
845     return 0;
846   }
847 
848   if (self->parent == NULL) {
849     // If parent is NULL but we are trying to modify a read-only message, this
850     // is a reference to a constant default instance that needs to be replaced
851     // with a mutable top-level message.
852     self->message = self->message->New();
853     self->owner.reset(self->message);
854     // Cascade the new owner to eventual children: even if this message is
855     // empty, some submessages or repeated containers might exist already.
856     SetOwner(self, self->owner);
857   } else {
858     // Otherwise, we need a mutable child message.
859     if (AssureWritable(self->parent) == -1)
860       return -1;
861 
862     // Make self->message writable.
863     Message* mutable_message = GetMutableMessage(
864         self->parent,
865         self->parent_field_descriptor);
866     if (mutable_message == NULL) {
867       return -1;
868     }
869     self->message = mutable_message;
870   }
871   self->read_only = false;
872 
873   // When a CMessage is made writable its Message pointer is updated
874   // to point to a new mutable Message.  When that happens we need to
875   // update any references to the old, read-only CMessage.  There are
876   // four places such references occur: RepeatedScalarContainer,
877   // RepeatedCompositeContainer, MapContainer, and ExtensionDict.
878   if (self->extensions != NULL)
879     self->extensions->message = self->message;
880   if (ForEachCompositeField(self, FixupMessageReference(self->message)) == -1)
881     return -1;
882 
883   return 0;
884 }
885 
886 // --- Globals:
887 
888 // Retrieve a C++ FieldDescriptor for a message attribute.
889 // The C++ message must be valid.
890 // TODO(amauryfa): This function should stay internal, because exception
891 // handling is not consistent.
892 static const FieldDescriptor* GetFieldDescriptor(
893     CMessage* self, PyObject* name) {
894   const Descriptor *message_descriptor = self->message->GetDescriptor();
895   char* field_name;
896   Py_ssize_t size;
897   if (PyString_AsStringAndSize(name, &field_name, &size) < 0) {
898     return NULL;
899   }
900   const FieldDescriptor *field_descriptor =
901       message_descriptor->FindFieldByName(string(field_name, size));
902   if (field_descriptor == NULL) {
903     // Note: No exception is set!
904     return NULL;
905   }
906   return field_descriptor;
907 }
908 
909 // Retrieve a C++ FieldDescriptor for an extension handle.
910 const FieldDescriptor* GetExtensionDescriptor(PyObject* extension) {
911   ScopedPyObjectPtr cdescriptor;
912   if (!PyObject_TypeCheck(extension, &PyFieldDescriptor_Type)) {
913     // Most callers consider extensions as a plain dictionary.  We should
914     // allow input which is not a field descriptor, and simply pretend it does
915     // not exist.
916     PyErr_SetObject(PyExc_KeyError, extension);
917     return NULL;
918   }
919   return PyFieldDescriptor_AsDescriptor(extension);
920 }
921 
922 // If value is a string, convert it into an enum value based on the labels in
923 // descriptor, otherwise simply return value.  Always returns a new reference.
924 static PyObject* GetIntegerEnumValue(const FieldDescriptor& descriptor,
925                                      PyObject* value) {
926   if (PyString_Check(value) || PyUnicode_Check(value)) {
927     const EnumDescriptor* enum_descriptor = descriptor.enum_type();
928     if (enum_descriptor == NULL) {
929       PyErr_SetString(PyExc_TypeError, "not an enum field");
930       return NULL;
931     }
932     char* enum_label;
933     Py_ssize_t size;
934     if (PyString_AsStringAndSize(value, &enum_label, &size) < 0) {
935       return NULL;
936     }
937     const EnumValueDescriptor* enum_value_descriptor =
938         enum_descriptor->FindValueByName(string(enum_label, size));
939     if (enum_value_descriptor == NULL) {
940       PyErr_SetString(PyExc_ValueError, "unknown enum label");
941       return NULL;
942     }
943     return PyInt_FromLong(enum_value_descriptor->number());
944   }
945   Py_INCREF(value);
946   return value;
947 }
948 
949 // If cmessage_list is not NULL, this function releases values into the
950 // container CMessages instead of just removing. Repeated composite container
951 // needs to do this to make sure CMessages stay alive if they're still
952 // referenced after deletion. Repeated scalar container doesn't need to worry.
953 int InternalDeleteRepeatedField(
954     CMessage* self,
955     const FieldDescriptor* field_descriptor,
956     PyObject* slice,
957     PyObject* cmessage_list) {
958   Message* message = self->message;
959   Py_ssize_t length, from, to, step, slice_length;
960   const Reflection* reflection = message->GetReflection();
961   int min, max;
962   length = reflection->FieldSize(*message, field_descriptor);
963 
964   if (PyInt_Check(slice) || PyLong_Check(slice)) {
965     from = to = PyLong_AsLong(slice);
966     if (from < 0) {
967       from = to = length + from;
968     }
969     step = 1;
970     min = max = from;
971 
972     // Range check.
973     if (from < 0 || from >= length) {
974       PyErr_Format(PyExc_IndexError, "list assignment index out of range");
975       return -1;
976     }
977   } else if (PySlice_Check(slice)) {
978     from = to = step = slice_length = 0;
979     PySlice_GetIndicesEx(
980 #if PY_MAJOR_VERSION < 3
981         reinterpret_cast<PySliceObject*>(slice),
982 #else
983         slice,
984 #endif
985         length, &from, &to, &step, &slice_length);
986     if (from < to) {
987       min = from;
988       max = to - 1;
989     } else {
990       min = to + 1;
991       max = from;
992     }
993   } else {
994     PyErr_SetString(PyExc_TypeError, "list indices must be integers");
995     return -1;
996   }
997 
998   Py_ssize_t i = from;
999   std::vector<bool> to_delete(length, false);
1000   while (i >= min && i <= max) {
1001     to_delete[i] = true;
1002     i += step;
1003   }
1004 
1005   to = 0;
1006   for (i = 0; i < length; ++i) {
1007     if (!to_delete[i]) {
1008       if (i != to) {
1009         reflection->SwapElements(message, field_descriptor, i, to);
1010         if (cmessage_list != NULL) {
1011           // If a list of cmessages is passed in (i.e. from a repeated
1012           // composite container), swap those as well to correspond to the
1013           // swaps in the underlying message so they're in the right order
1014           // when we start releasing.
1015           PyObject* tmp = PyList_GET_ITEM(cmessage_list, i);
1016           PyList_SET_ITEM(cmessage_list, i,
1017                           PyList_GET_ITEM(cmessage_list, to));
1018           PyList_SET_ITEM(cmessage_list, to, tmp);
1019         }
1020       }
1021       ++to;
1022     }
1023   }
1024 
1025   while (i > to) {
1026     if (cmessage_list == NULL) {
1027       reflection->RemoveLast(message, field_descriptor);
1028     } else {
1029       CMessage* last_cmessage = reinterpret_cast<CMessage*>(
1030           PyList_GET_ITEM(cmessage_list, PyList_GET_SIZE(cmessage_list) - 1));
1031       repeated_composite_container::ReleaseLastTo(
1032           self, field_descriptor, last_cmessage);
1033       if (PySequence_DelItem(cmessage_list, -1) < 0) {
1034         return -1;
1035       }
1036     }
1037     --i;
1038   }
1039 
1040   return 0;
1041 }
1042 
1043 // Initializes fields of a message. Used in constructors.
1044 int InitAttributes(CMessage* self, PyObject* kwargs) {
1045   if (kwargs == NULL) {
1046     return 0;
1047   }
1048 
1049   Py_ssize_t pos = 0;
1050   PyObject* name;
1051   PyObject* value;
1052   while (PyDict_Next(kwargs, &pos, &name, &value)) {
1053     if (!PyString_Check(name)) {
1054       PyErr_SetString(PyExc_ValueError, "Field name must be a string");
1055       return -1;
1056     }
1057     const FieldDescriptor* descriptor = GetFieldDescriptor(self, name);
1058     if (descriptor == NULL) {
1059       PyErr_Format(PyExc_ValueError, "Protocol message %s has no \"%s\" field.",
1060                    self->message->GetDescriptor()->name().c_str(),
1061                    PyString_AsString(name));
1062       return -1;
1063     }
1064     if (value == Py_None) {
1065       // field=None is the same as no field at all.
1066       continue;
1067     }
1068     if (descriptor->is_map()) {
1069       ScopedPyObjectPtr map(GetAttr(self, name));
1070       const FieldDescriptor* value_descriptor =
1071           descriptor->message_type()->FindFieldByName("value");
1072       if (value_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1073         Py_ssize_t map_pos = 0;
1074         PyObject* map_key;
1075         PyObject* map_value;
1076         while (PyDict_Next(value, &map_pos, &map_key, &map_value)) {
1077           ScopedPyObjectPtr function_return;
1078           function_return.reset(PyObject_GetItem(map.get(), map_key));
1079           if (function_return.get() == NULL) {
1080             return -1;
1081           }
1082           ScopedPyObjectPtr ok(PyObject_CallMethod(
1083               function_return.get(), "MergeFrom", "O", map_value));
1084           if (ok.get() == NULL) {
1085             return -1;
1086           }
1087         }
1088       } else {
1089         ScopedPyObjectPtr function_return;
1090         function_return.reset(
1091             PyObject_CallMethod(map.get(), "update", "O", value));
1092         if (function_return.get() == NULL) {
1093           return -1;
1094         }
1095       }
1096     } else if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1097       ScopedPyObjectPtr container(GetAttr(self, name));
1098       if (container == NULL) {
1099         return -1;
1100       }
1101       if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1102         RepeatedCompositeContainer* rc_container =
1103             reinterpret_cast<RepeatedCompositeContainer*>(container.get());
1104         ScopedPyObjectPtr iter(PyObject_GetIter(value));
1105         if (iter == NULL) {
1106           PyErr_SetString(PyExc_TypeError, "Value must be iterable");
1107           return -1;
1108         }
1109         ScopedPyObjectPtr next;
1110         while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
1111           PyObject* kwargs = (PyDict_Check(next.get()) ? next.get() : NULL);
1112           ScopedPyObjectPtr new_msg(
1113               repeated_composite_container::Add(rc_container, NULL, kwargs));
1114           if (new_msg == NULL) {
1115             return -1;
1116           }
1117           if (kwargs == NULL) {
1118             // next was not a dict, it's a message we need to merge
1119             ScopedPyObjectPtr merged(MergeFrom(
1120                 reinterpret_cast<CMessage*>(new_msg.get()), next.get()));
1121             if (merged.get() == NULL) {
1122               return -1;
1123             }
1124           }
1125         }
1126         if (PyErr_Occurred()) {
1127           // Check to see how PyIter_Next() exited.
1128           return -1;
1129         }
1130       } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1131         RepeatedScalarContainer* rs_container =
1132             reinterpret_cast<RepeatedScalarContainer*>(container.get());
1133         ScopedPyObjectPtr iter(PyObject_GetIter(value));
1134         if (iter == NULL) {
1135           PyErr_SetString(PyExc_TypeError, "Value must be iterable");
1136           return -1;
1137         }
1138         ScopedPyObjectPtr next;
1139         while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
1140           ScopedPyObjectPtr enum_value(
1141               GetIntegerEnumValue(*descriptor, next.get()));
1142           if (enum_value == NULL) {
1143             return -1;
1144           }
1145           ScopedPyObjectPtr new_msg(repeated_scalar_container::Append(
1146               rs_container, enum_value.get()));
1147           if (new_msg == NULL) {
1148             return -1;
1149           }
1150         }
1151         if (PyErr_Occurred()) {
1152           // Check to see how PyIter_Next() exited.
1153           return -1;
1154         }
1155       } else {
1156         if (ScopedPyObjectPtr(repeated_scalar_container::Extend(
1157                 reinterpret_cast<RepeatedScalarContainer*>(container.get()),
1158                 value)) ==
1159             NULL) {
1160           return -1;
1161         }
1162       }
1163     } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1164       ScopedPyObjectPtr message(GetAttr(self, name));
1165       if (message == NULL) {
1166         return -1;
1167       }
1168       CMessage* cmessage = reinterpret_cast<CMessage*>(message.get());
1169       if (PyDict_Check(value)) {
1170         if (InitAttributes(cmessage, value) < 0) {
1171           return -1;
1172         }
1173       } else {
1174         ScopedPyObjectPtr merged(MergeFrom(cmessage, value));
1175         if (merged == NULL) {
1176           return -1;
1177         }
1178       }
1179     } else {
1180       ScopedPyObjectPtr new_val;
1181       if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1182         new_val.reset(GetIntegerEnumValue(*descriptor, value));
1183         if (new_val == NULL) {
1184           return -1;
1185         }
1186       }
1187       if (SetAttr(self, name, (new_val.get() == NULL) ? value : new_val.get()) <
1188           0) {
1189         return -1;
1190       }
1191     }
1192   }
1193   return 0;
1194 }
1195 
1196 // Allocates an incomplete Python Message: the caller must fill self->message,
1197 // self->owner and eventually self->parent.
1198 CMessage* NewEmptyMessage(CMessageClass* type) {
1199   CMessage* self = reinterpret_cast<CMessage*>(
1200       PyType_GenericAlloc(&type->super.ht_type, 0));
1201   if (self == NULL) {
1202     return NULL;
1203   }
1204 
1205   self->message = NULL;
1206   self->parent = NULL;
1207   self->parent_field_descriptor = NULL;
1208   self->read_only = false;
1209   self->extensions = NULL;
1210 
1211   self->composite_fields = NULL;
1212 
1213   return self;
1214 }
1215 
1216 // The __new__ method of Message classes.
1217 // Creates a new C++ message and takes ownership.
1218 static PyObject* New(PyTypeObject* cls,
1219                      PyObject* unused_args, PyObject* unused_kwargs) {
1220   CMessageClass* type = CheckMessageClass(cls);
1221   if (type == NULL) {
1222     return NULL;
1223   }
1224   // Retrieve the message descriptor and the default instance (=prototype).
1225   const Descriptor* message_descriptor = type->message_descriptor;
1226   if (message_descriptor == NULL) {
1227     return NULL;
1228   }
1229   const Message* default_message = type->py_descriptor_pool->message_factory
1230                                    ->GetPrototype(message_descriptor);
1231   if (default_message == NULL) {
1232     PyErr_SetString(PyExc_TypeError, message_descriptor->full_name().c_str());
1233     return NULL;
1234   }
1235 
1236   CMessage* self = NewEmptyMessage(type);
1237   if (self == NULL) {
1238     return NULL;
1239   }
1240   self->message = default_message->New();
1241   self->owner.reset(self->message);
1242   return reinterpret_cast<PyObject*>(self);
1243 }
1244 
1245 // The __init__ method of Message classes.
1246 // It initializes fields from keywords passed to the constructor.
1247 static int Init(CMessage* self, PyObject* args, PyObject* kwargs) {
1248   if (PyTuple_Size(args) != 0) {
1249     PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
1250     return -1;
1251   }
1252 
1253   return InitAttributes(self, kwargs);
1254 }
1255 
1256 // ---------------------------------------------------------------------
1257 // Deallocating a CMessage
1258 //
1259 // Deallocating a CMessage requires that we clear any weak references
1260 // from children to the message being deallocated.
1261 
1262 // Clear the weak reference from the child to the parent.
1263 struct ClearWeakReferences : public ChildVisitor {
1264   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1265     container->parent = NULL;
1266     // The elements in the container have the same parent as the
1267     // container itself, so NULL out that pointer as well.
1268     const Py_ssize_t n = PyList_GET_SIZE(container->child_messages);
1269     for (Py_ssize_t i = 0; i < n; ++i) {
1270       CMessage* child_cmessage = reinterpret_cast<CMessage*>(
1271           PyList_GET_ITEM(container->child_messages, i));
1272       child_cmessage->parent = NULL;
1273     }
1274     return 0;
1275   }
1276 
1277   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1278     container->parent = NULL;
1279     return 0;
1280   }
1281 
1282   int VisitMapContainer(MapContainer* container) {
1283     container->parent = NULL;
1284     return 0;
1285   }
1286 
1287   int VisitCMessage(CMessage* cmessage,
1288                     const FieldDescriptor* field_descriptor) {
1289     cmessage->parent = NULL;
1290     return 0;
1291   }
1292 };
1293 
1294 static void Dealloc(CMessage* self) {
1295   // Null out all weak references from children to this message.
1296   GOOGLE_CHECK_EQ(0, ForEachCompositeField(self, ClearWeakReferences()));
1297   if (self->extensions) {
1298     self->extensions->parent = NULL;
1299   }
1300 
1301   Py_CLEAR(self->extensions);
1302   Py_CLEAR(self->composite_fields);
1303   self->owner.reset();
1304   Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
1305 }
1306 
1307 // ---------------------------------------------------------------------
1308 
1309 
1310 PyObject* IsInitialized(CMessage* self, PyObject* args) {
1311   PyObject* errors = NULL;
1312   if (PyArg_ParseTuple(args, "|O", &errors) < 0) {
1313     return NULL;
1314   }
1315   if (self->message->IsInitialized()) {
1316     Py_RETURN_TRUE;
1317   }
1318   if (errors != NULL) {
1319     ScopedPyObjectPtr initialization_errors(
1320         FindInitializationErrors(self));
1321     if (initialization_errors == NULL) {
1322       return NULL;
1323     }
1324     ScopedPyObjectPtr extend_name(PyString_FromString("extend"));
1325     if (extend_name == NULL) {
1326       return NULL;
1327     }
1328     ScopedPyObjectPtr result(PyObject_CallMethodObjArgs(
1329         errors,
1330         extend_name.get(),
1331         initialization_errors.get(),
1332         NULL));
1333     if (result == NULL) {
1334       return NULL;
1335     }
1336   }
1337   Py_RETURN_FALSE;
1338 }
1339 
1340 PyObject* HasFieldByDescriptor(
1341     CMessage* self, const FieldDescriptor* field_descriptor) {
1342   Message* message = self->message;
1343   if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
1344     return NULL;
1345   }
1346   if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1347     PyErr_SetString(PyExc_KeyError,
1348                     "Field is repeated. A singular method is required.");
1349     return NULL;
1350   }
1351   bool has_field =
1352       message->GetReflection()->HasField(*message, field_descriptor);
1353   return PyBool_FromLong(has_field ? 1 : 0);
1354 }
1355 
1356 const FieldDescriptor* FindFieldWithOneofs(
1357     const Message* message, const string& field_name, bool* in_oneof) {
1358   *in_oneof = false;
1359   const Descriptor* descriptor = message->GetDescriptor();
1360   const FieldDescriptor* field_descriptor =
1361       descriptor->FindFieldByName(field_name);
1362   if (field_descriptor != NULL) {
1363     return field_descriptor;
1364   }
1365   const OneofDescriptor* oneof_desc =
1366       descriptor->FindOneofByName(field_name);
1367   if (oneof_desc != NULL) {
1368     *in_oneof = true;
1369     return message->GetReflection()->GetOneofFieldDescriptor(*message,
1370                                                              oneof_desc);
1371   }
1372   return NULL;
1373 }
1374 
1375 bool CheckHasPresence(const FieldDescriptor* field_descriptor, bool in_oneof) {
1376   if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1377     PyErr_Format(PyExc_ValueError,
1378                  "Protocol message has no singular \"%s\" field.",
1379                  field_descriptor->name().c_str());
1380     return false;
1381   }
1382 
1383   if (field_descriptor->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
1384     // HasField() for a oneof *itself* isn't supported.
1385     if (in_oneof) {
1386       PyErr_Format(PyExc_ValueError,
1387                    "Can't test oneof field \"%s\" for presence in proto3, use "
1388                    "WhichOneof instead.",
1389                    field_descriptor->containing_oneof()->name().c_str());
1390       return false;
1391     }
1392 
1393     // ...but HasField() for fields *in* a oneof is supported.
1394     if (field_descriptor->containing_oneof() != NULL) {
1395       return true;
1396     }
1397 
1398     if (field_descriptor->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1399       PyErr_Format(
1400           PyExc_ValueError,
1401           "Can't test non-submessage field \"%s\" for presence in proto3.",
1402           field_descriptor->name().c_str());
1403       return false;
1404     }
1405   }
1406 
1407   return true;
1408 }
1409 
1410 PyObject* HasField(CMessage* self, PyObject* arg) {
1411   char* field_name;
1412   Py_ssize_t size;
1413 #if PY_MAJOR_VERSION < 3
1414   if (PyString_AsStringAndSize(arg, &field_name, &size) < 0) {
1415     return NULL;
1416   }
1417 #else
1418   field_name = PyUnicode_AsUTF8AndSize(arg, &size);
1419   if (!field_name) {
1420     return NULL;
1421   }
1422 #endif
1423 
1424   Message* message = self->message;
1425   bool is_in_oneof;
1426   const FieldDescriptor* field_descriptor =
1427       FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
1428   if (field_descriptor == NULL) {
1429     if (!is_in_oneof) {
1430       PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name);
1431       return NULL;
1432     } else {
1433       Py_RETURN_FALSE;
1434     }
1435   }
1436 
1437   if (!CheckHasPresence(field_descriptor, is_in_oneof)) {
1438     return NULL;
1439   }
1440 
1441   if (message->GetReflection()->HasField(*message, field_descriptor)) {
1442     Py_RETURN_TRUE;
1443   }
1444   if (!message->GetReflection()->SupportsUnknownEnumValues() &&
1445       field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1446     // Special case: Python HasField() differs in semantics from C++
1447     // slightly: we return HasField('enum_field') == true if there is
1448     // an unknown enum value present. To implement this we have to
1449     // look in the UnknownFieldSet.
1450     const UnknownFieldSet& unknown_field_set =
1451         message->GetReflection()->GetUnknownFields(*message);
1452     for (int i = 0; i < unknown_field_set.field_count(); ++i) {
1453       if (unknown_field_set.field(i).number() == field_descriptor->number()) {
1454         Py_RETURN_TRUE;
1455       }
1456     }
1457   }
1458   Py_RETURN_FALSE;
1459 }
1460 
1461 PyObject* ClearExtension(CMessage* self, PyObject* extension) {
1462   if (self->extensions != NULL) {
1463     return extension_dict::ClearExtension(self->extensions, extension);
1464   } else {
1465     const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
1466     if (descriptor == NULL) {
1467       return NULL;
1468     }
1469     if (ScopedPyObjectPtr(ClearFieldByDescriptor(self, descriptor)) == NULL) {
1470       return NULL;
1471     }
1472   }
1473   Py_RETURN_NONE;
1474 }
1475 
1476 PyObject* HasExtension(CMessage* self, PyObject* extension) {
1477   const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
1478   if (descriptor == NULL) {
1479     return NULL;
1480   }
1481   return HasFieldByDescriptor(self, descriptor);
1482 }
1483 
1484 // ---------------------------------------------------------------------
1485 // Releasing messages
1486 //
1487 // The Python API's ClearField() and Clear() methods behave
1488 // differently than their C++ counterparts.  While the C++ versions
1489 // clears the children the Python versions detaches the children,
1490 // without touching their content.  This impedance mismatch causes
1491 // some complexity in the implementation, which is captured in this
1492 // section.
1493 //
1494 // When a CMessage field is cleared we need to:
1495 //
1496 // * Release the Message used as the backing store for the CMessage
1497 //   from its parent.
1498 //
1499 // * Change the owner field of the released CMessage and all of its
1500 //   children to point to the newly released Message.
1501 //
1502 // * Clear the weak references from the released CMessage to the
1503 //   parent.
1504 //
1505 // When a RepeatedCompositeContainer field is cleared we need to:
1506 //
1507 // * Release all the Message used as the backing store for the
1508 //   CMessages stored in the container.
1509 //
1510 // * Change the owner field of all the released CMessage and all of
1511 //   their children to point to the newly released Messages.
1512 //
1513 // * Clear the weak references from the released container to the
1514 //   parent.
1515 
1516 struct SetOwnerVisitor : public ChildVisitor {
1517   // new_owner must outlive this object.
1518   explicit SetOwnerVisitor(const shared_ptr<Message>& new_owner)
1519       : new_owner_(new_owner) {}
1520 
1521   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1522     repeated_composite_container::SetOwner(container, new_owner_);
1523     return 0;
1524   }
1525 
1526   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1527     repeated_scalar_container::SetOwner(container, new_owner_);
1528     return 0;
1529   }
1530 
1531   int VisitMapContainer(MapContainer* container) {
1532     container->SetOwner(new_owner_);
1533     return 0;
1534   }
1535 
1536   int VisitCMessage(CMessage* cmessage,
1537                     const FieldDescriptor* field_descriptor) {
1538     return SetOwner(cmessage, new_owner_);
1539   }
1540 
1541  private:
1542   const shared_ptr<Message>& new_owner_;
1543 };
1544 
1545 // Change the owner of this CMessage and all its children, recursively.
1546 int SetOwner(CMessage* self, const shared_ptr<Message>& new_owner) {
1547   self->owner = new_owner;
1548   if (ForEachCompositeField(self, SetOwnerVisitor(new_owner)) == -1)
1549     return -1;
1550   return 0;
1551 }
1552 
1553 // Releases the message specified by 'field' and returns the
1554 // pointer. If the field does not exist a new message is created using
1555 // 'descriptor'. The caller takes ownership of the returned pointer.
1556 Message* ReleaseMessage(CMessage* self,
1557                         const Descriptor* descriptor,
1558                         const FieldDescriptor* field_descriptor) {
1559   MessageFactory* message_factory = GetFactoryForMessage(self);
1560   Message* released_message = self->message->GetReflection()->ReleaseMessage(
1561       self->message, field_descriptor, message_factory);
1562   // ReleaseMessage will return NULL which differs from
1563   // child_cmessage->message, if the field does not exist.  In this case,
1564   // the latter points to the default instance via a const_cast<>, so we
1565   // have to reset it to a new mutable object since we are taking ownership.
1566   if (released_message == NULL) {
1567     const Message* prototype = message_factory->GetPrototype(descriptor);
1568     GOOGLE_DCHECK(prototype != NULL);
1569     released_message = prototype->New();
1570   }
1571 
1572   return released_message;
1573 }
1574 
1575 int ReleaseSubMessage(CMessage* self,
1576                       const FieldDescriptor* field_descriptor,
1577                       CMessage* child_cmessage) {
1578   // Release the Message
1579   shared_ptr<Message> released_message(ReleaseMessage(
1580       self, child_cmessage->message->GetDescriptor(), field_descriptor));
1581   child_cmessage->message = released_message.get();
1582   child_cmessage->owner.swap(released_message);
1583   child_cmessage->parent = NULL;
1584   child_cmessage->parent_field_descriptor = NULL;
1585   child_cmessage->read_only = false;
1586   return ForEachCompositeField(child_cmessage,
1587                                SetOwnerVisitor(child_cmessage->owner));
1588 }
1589 
1590 struct ReleaseChild : public ChildVisitor {
1591   // message must outlive this object.
1592   explicit ReleaseChild(CMessage* parent) :
1593       parent_(parent) {}
1594 
1595   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1596     return repeated_composite_container::Release(
1597         reinterpret_cast<RepeatedCompositeContainer*>(container));
1598   }
1599 
1600   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1601     return repeated_scalar_container::Release(
1602         reinterpret_cast<RepeatedScalarContainer*>(container));
1603   }
1604 
1605   int VisitMapContainer(MapContainer* container) {
1606     return reinterpret_cast<MapContainer*>(container)->Release();
1607   }
1608 
1609   int VisitCMessage(CMessage* cmessage,
1610                     const FieldDescriptor* field_descriptor) {
1611     return ReleaseSubMessage(parent_, field_descriptor,
1612         reinterpret_cast<CMessage*>(cmessage));
1613   }
1614 
1615   CMessage* parent_;
1616 };
1617 
1618 int InternalReleaseFieldByDescriptor(
1619     CMessage* self,
1620     const FieldDescriptor* field_descriptor,
1621     PyObject* composite_field) {
1622   return VisitCompositeField(
1623       field_descriptor,
1624       composite_field,
1625       ReleaseChild(self));
1626 }
1627 
1628 PyObject* ClearFieldByDescriptor(
1629     CMessage* self,
1630     const FieldDescriptor* descriptor) {
1631   if (!CheckFieldBelongsToMessage(descriptor, self->message)) {
1632     return NULL;
1633   }
1634   AssureWritable(self);
1635   self->message->GetReflection()->ClearField(self->message, descriptor);
1636   Py_RETURN_NONE;
1637 }
1638 
1639 PyObject* ClearField(CMessage* self, PyObject* arg) {
1640   if (!PyString_Check(arg)) {
1641     PyErr_SetString(PyExc_TypeError, "field name must be a string");
1642     return NULL;
1643   }
1644 #if PY_MAJOR_VERSION < 3
1645   const char* field_name = PyString_AS_STRING(arg);
1646   Py_ssize_t size = PyString_GET_SIZE(arg);
1647 #else
1648   Py_ssize_t size;
1649   const char* field_name = PyUnicode_AsUTF8AndSize(arg, &size);
1650 #endif
1651   AssureWritable(self);
1652   Message* message = self->message;
1653   ScopedPyObjectPtr arg_in_oneof;
1654   bool is_in_oneof;
1655   const FieldDescriptor* field_descriptor =
1656       FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
1657   if (field_descriptor == NULL) {
1658     if (!is_in_oneof) {
1659       PyErr_Format(PyExc_ValueError,
1660                    "Protocol message has no \"%s\" field.", field_name);
1661       return NULL;
1662     } else {
1663       Py_RETURN_NONE;
1664     }
1665   } else if (is_in_oneof) {
1666     const string& name = field_descriptor->name();
1667     arg_in_oneof.reset(PyString_FromStringAndSize(name.c_str(), name.size()));
1668     arg = arg_in_oneof.get();
1669   }
1670 
1671   PyObject* composite_field = self->composite_fields ?
1672       PyDict_GetItem(self->composite_fields, arg) : NULL;
1673 
1674   // Only release the field if there's a possibility that there are
1675   // references to it.
1676   if (composite_field != NULL) {
1677     if (InternalReleaseFieldByDescriptor(self, field_descriptor,
1678                                          composite_field) < 0) {
1679       return NULL;
1680     }
1681     PyDict_DelItem(self->composite_fields, arg);
1682   }
1683   message->GetReflection()->ClearField(message, field_descriptor);
1684   if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
1685       !message->GetReflection()->SupportsUnknownEnumValues()) {
1686     UnknownFieldSet* unknown_field_set =
1687         message->GetReflection()->MutableUnknownFields(message);
1688     unknown_field_set->DeleteByNumber(field_descriptor->number());
1689   }
1690 
1691   Py_RETURN_NONE;
1692 }
1693 
1694 PyObject* Clear(CMessage* self) {
1695   AssureWritable(self);
1696   if (ForEachCompositeField(self, ReleaseChild(self)) == -1)
1697     return NULL;
1698   Py_CLEAR(self->extensions);
1699   if (self->composite_fields) {
1700     PyDict_Clear(self->composite_fields);
1701   }
1702   self->message->Clear();
1703   Py_RETURN_NONE;
1704 }
1705 
1706 // ---------------------------------------------------------------------
1707 
1708 static string GetMessageName(CMessage* self) {
1709   if (self->parent_field_descriptor != NULL) {
1710     return self->parent_field_descriptor->full_name();
1711   } else {
1712     return self->message->GetDescriptor()->full_name();
1713   }
1714 }
1715 
1716 static PyObject* SerializeToString(CMessage* self, PyObject* args) {
1717   if (!self->message->IsInitialized()) {
1718     ScopedPyObjectPtr errors(FindInitializationErrors(self));
1719     if (errors == NULL) {
1720       return NULL;
1721     }
1722     ScopedPyObjectPtr comma(PyString_FromString(","));
1723     if (comma == NULL) {
1724       return NULL;
1725     }
1726     ScopedPyObjectPtr joined(
1727         PyObject_CallMethod(comma.get(), "join", "O", errors.get()));
1728     if (joined == NULL) {
1729       return NULL;
1730     }
1731 
1732     // TODO(haberman): this is a (hopefully temporary) hack.  The unit testing
1733     // infrastructure reloads all pure-Python modules for every test, but not
1734     // C++ modules (because that's generally impossible:
1735     // http://bugs.python.org/issue1144263).  But if we cache EncodeError, we'll
1736     // return the EncodeError from a previous load of the module, which won't
1737     // match a user's attempt to catch EncodeError.  So we have to look it up
1738     // again every time.
1739     ScopedPyObjectPtr message_module(PyImport_ImportModule(
1740         "google.protobuf.message"));
1741     if (message_module.get() == NULL) {
1742       return NULL;
1743     }
1744 
1745     ScopedPyObjectPtr encode_error(
1746         PyObject_GetAttrString(message_module.get(), "EncodeError"));
1747     if (encode_error.get() == NULL) {
1748       return NULL;
1749     }
1750     PyErr_Format(encode_error.get(),
1751                  "Message %s is missing required fields: %s",
1752                  GetMessageName(self).c_str(), PyString_AsString(joined.get()));
1753     return NULL;
1754   }
1755   int size = self->message->ByteSize();
1756   if (size <= 0) {
1757     return PyBytes_FromString("");
1758   }
1759   PyObject* result = PyBytes_FromStringAndSize(NULL, size);
1760   if (result == NULL) {
1761     return NULL;
1762   }
1763   char* buffer = PyBytes_AS_STRING(result);
1764   self->message->SerializeWithCachedSizesToArray(
1765       reinterpret_cast<uint8*>(buffer));
1766   return result;
1767 }
1768 
1769 static PyObject* SerializePartialToString(CMessage* self) {
1770   string contents;
1771   self->message->SerializePartialToString(&contents);
1772   return PyBytes_FromStringAndSize(contents.c_str(), contents.size());
1773 }
1774 
1775 // Formats proto fields for ascii dumps using python formatting functions where
1776 // appropriate.
1777 class PythonFieldValuePrinter : public TextFormat::FieldValuePrinter {
1778  public:
1779   // Python has some differences from C++ when printing floating point numbers.
1780   //
1781   // 1) Trailing .0 is always printed.
1782   // 2) (Python2) Output is rounded to 12 digits.
1783   // 3) (Python3) The full precision of the double is preserved (and Python uses
1784   //    David M. Gay's dtoa(), when the C++ code uses SimpleDtoa. There are some
1785   //    differences, but they rarely happen)
1786   //
1787   // We override floating point printing with the C-API function for printing
1788   // Python floats to ensure consistency.
1789   string PrintFloat(float value) const { return PrintDouble(value); }
1790   string PrintDouble(double value) const {
1791     // This implementation is not highly optimized (it allocates two temporary
1792     // Python objects) but it is simple and portable.  If this is shown to be a
1793     // performance bottleneck, we can optimize it, but the results will likely
1794     // be more complicated to accommodate the differing behavior of double
1795     // formatting between Python 2 and Python 3.
1796     //
1797     // (Though a valid question is: do we really want to make out output
1798     // dependent on the Python version?)
1799     ScopedPyObjectPtr py_value(PyFloat_FromDouble(value));
1800     if (!py_value.get()) {
1801       return string();
1802     }
1803 
1804     ScopedPyObjectPtr py_str(PyObject_Str(py_value.get()));
1805     if (!py_str.get()) {
1806       return string();
1807     }
1808 
1809     return string(PyString_AsString(py_str.get()));
1810   }
1811 };
1812 
1813 static PyObject* ToStr(CMessage* self) {
1814   TextFormat::Printer printer;
1815   // Passes ownership
1816   printer.SetDefaultFieldValuePrinter(new PythonFieldValuePrinter());
1817   printer.SetHideUnknownFields(true);
1818   string output;
1819   if (!printer.PrintToString(*self->message, &output)) {
1820     PyErr_SetString(PyExc_ValueError, "Unable to convert message to str");
1821     return NULL;
1822   }
1823   return PyString_FromString(output.c_str());
1824 }
1825 
1826 PyObject* MergeFrom(CMessage* self, PyObject* arg) {
1827   CMessage* other_message;
1828   if (!PyObject_TypeCheck(arg, &CMessage_Type)) {
1829     PyErr_Format(PyExc_TypeError,
1830                  "Parameter to MergeFrom() must be instance of same class: "
1831                  "expected %s got %s.",
1832                  self->message->GetDescriptor()->full_name().c_str(),
1833                  Py_TYPE(arg)->tp_name);
1834     return NULL;
1835   }
1836 
1837   other_message = reinterpret_cast<CMessage*>(arg);
1838   if (other_message->message->GetDescriptor() !=
1839       self->message->GetDescriptor()) {
1840     PyErr_Format(PyExc_TypeError,
1841                  "Parameter to MergeFrom() must be instance of same class: "
1842                  "expected %s got %s.",
1843                  self->message->GetDescriptor()->full_name().c_str(),
1844                  other_message->message->GetDescriptor()->full_name().c_str());
1845     return NULL;
1846   }
1847   AssureWritable(self);
1848 
1849   // TODO(tibell): Message::MergeFrom might turn some child Messages
1850   // into mutable messages, invalidating the message field in the
1851   // corresponding CMessages.  We should run a FixupMessageReferences
1852   // pass here.
1853 
1854   self->message->MergeFrom(*other_message->message);
1855   Py_RETURN_NONE;
1856 }
1857 
1858 static PyObject* CopyFrom(CMessage* self, PyObject* arg) {
1859   CMessage* other_message;
1860   if (!PyObject_TypeCheck(arg, &CMessage_Type)) {
1861     PyErr_Format(PyExc_TypeError,
1862                  "Parameter to CopyFrom() must be instance of same class: "
1863                  "expected %s got %s.",
1864                  self->message->GetDescriptor()->full_name().c_str(),
1865                  Py_TYPE(arg)->tp_name);
1866     return NULL;
1867   }
1868 
1869   other_message = reinterpret_cast<CMessage*>(arg);
1870 
1871   if (self == other_message) {
1872     Py_RETURN_NONE;
1873   }
1874 
1875   if (other_message->message->GetDescriptor() !=
1876       self->message->GetDescriptor()) {
1877     PyErr_Format(PyExc_TypeError,
1878                  "Parameter to CopyFrom() must be instance of same class: "
1879                  "expected %s got %s.",
1880                  self->message->GetDescriptor()->full_name().c_str(),
1881                  other_message->message->GetDescriptor()->full_name().c_str());
1882     return NULL;
1883   }
1884 
1885   AssureWritable(self);
1886 
1887   // CopyFrom on the message will not clean up self->composite_fields,
1888   // which can leave us in an inconsistent state, so clear it out here.
1889   (void)ScopedPyObjectPtr(Clear(self));
1890 
1891   self->message->CopyFrom(*other_message->message);
1892 
1893   Py_RETURN_NONE;
1894 }
1895 
1896 // Protobuf has a 64MB limit built in, this variable will override this. Please
1897 // do not enable this unless you fully understand the implications: protobufs
1898 // must all be kept in memory at the same time, so if they grow too big you may
1899 // get OOM errors. The protobuf APIs do not provide any tools for processing
1900 // protobufs in chunks.  If you have protos this big you should break them up if
1901 // it is at all convenient to do so.
1902 static bool allow_oversize_protos = false;
1903 
1904 // Provide a method in the module to set allow_oversize_protos to a boolean
1905 // value. This method returns the newly value of allow_oversize_protos.
1906 static PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg) {
1907   if (!arg || !PyBool_Check(arg)) {
1908     PyErr_SetString(PyExc_TypeError,
1909                     "Argument to SetAllowOversizeProtos must be boolean");
1910     return NULL;
1911   }
1912   allow_oversize_protos = PyObject_IsTrue(arg);
1913   if (allow_oversize_protos) {
1914     Py_RETURN_TRUE;
1915   } else {
1916     Py_RETURN_FALSE;
1917   }
1918 }
1919 
1920 static PyObject* MergeFromString(CMessage* self, PyObject* arg) {
1921   const void* data;
1922   Py_ssize_t data_length;
1923   if (PyObject_AsReadBuffer(arg, &data, &data_length) < 0) {
1924     return NULL;
1925   }
1926 
1927   AssureWritable(self);
1928   io::CodedInputStream input(
1929       reinterpret_cast<const uint8*>(data), data_length);
1930   if (allow_oversize_protos) {
1931     input.SetTotalBytesLimit(INT_MAX, INT_MAX);
1932   }
1933   PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
1934   input.SetExtensionRegistry(pool->pool, pool->message_factory);
1935   bool success = self->message->MergePartialFromCodedStream(&input);
1936   if (success) {
1937     return PyInt_FromLong(input.CurrentPosition());
1938   } else {
1939     PyErr_Format(DecodeError_class, "Error parsing message");
1940     return NULL;
1941   }
1942 }
1943 
1944 static PyObject* ParseFromString(CMessage* self, PyObject* arg) {
1945   if (ScopedPyObjectPtr(Clear(self)) == NULL) {
1946     return NULL;
1947   }
1948   return MergeFromString(self, arg);
1949 }
1950 
1951 static PyObject* ByteSize(CMessage* self, PyObject* args) {
1952   return PyLong_FromLong(self->message->ByteSize());
1953 }
1954 
1955 static PyObject* RegisterExtension(PyObject* cls,
1956                                    PyObject* extension_handle) {
1957   const FieldDescriptor* descriptor =
1958       GetExtensionDescriptor(extension_handle);
1959   if (descriptor == NULL) {
1960     return NULL;
1961   }
1962 
1963   ScopedPyObjectPtr extensions_by_name(
1964       PyObject_GetAttr(cls, k_extensions_by_name));
1965   if (extensions_by_name == NULL) {
1966     PyErr_SetString(PyExc_TypeError, "no extensions_by_name on class");
1967     return NULL;
1968   }
1969   ScopedPyObjectPtr full_name(PyObject_GetAttr(extension_handle, kfull_name));
1970   if (full_name == NULL) {
1971     return NULL;
1972   }
1973 
1974   // If the extension was already registered, check that it is the same.
1975   PyObject* existing_extension =
1976       PyDict_GetItem(extensions_by_name.get(), full_name.get());
1977   if (existing_extension != NULL) {
1978     const FieldDescriptor* existing_extension_descriptor =
1979         GetExtensionDescriptor(existing_extension);
1980     if (existing_extension_descriptor != descriptor) {
1981       PyErr_SetString(PyExc_ValueError, "Double registration of Extensions");
1982       return NULL;
1983     }
1984     // Nothing else to do.
1985     Py_RETURN_NONE;
1986   }
1987 
1988   if (PyDict_SetItem(extensions_by_name.get(), full_name.get(),
1989                      extension_handle) < 0) {
1990     return NULL;
1991   }
1992 
1993   // Also store a mapping from extension number to implementing class.
1994   ScopedPyObjectPtr extensions_by_number(
1995       PyObject_GetAttr(cls, k_extensions_by_number));
1996   if (extensions_by_number == NULL) {
1997     PyErr_SetString(PyExc_TypeError, "no extensions_by_number on class");
1998     return NULL;
1999   }
2000 
2001   ScopedPyObjectPtr number(PyObject_GetAttrString(extension_handle, "number"));
2002   if (number == NULL) {
2003     return NULL;
2004   }
2005 
2006   // If the extension was already registered by number, check that it is the
2007   // same.
2008   existing_extension = PyDict_GetItem(extensions_by_number.get(), number.get());
2009   if (existing_extension != NULL) {
2010     const FieldDescriptor* existing_extension_descriptor =
2011         GetExtensionDescriptor(existing_extension);
2012     if (existing_extension_descriptor != descriptor) {
2013       const Descriptor* msg_desc = GetMessageDescriptor(
2014           reinterpret_cast<PyTypeObject*>(cls));
2015       PyErr_Format(
2016           PyExc_ValueError,
2017           "Extensions \"%s\" and \"%s\" both try to extend message type "
2018           "\"%s\" with field number %ld.",
2019           existing_extension_descriptor->full_name().c_str(),
2020           descriptor->full_name().c_str(),
2021           msg_desc->full_name().c_str(),
2022           PyInt_AsLong(number.get()));
2023       return NULL;
2024     }
2025     // Nothing else to do.
2026     Py_RETURN_NONE;
2027   }
2028   if (PyDict_SetItem(extensions_by_number.get(), number.get(),
2029                      extension_handle) < 0) {
2030     return NULL;
2031   }
2032 
2033   // Check if it's a message set
2034   if (descriptor->is_extension() &&
2035       descriptor->containing_type()->options().message_set_wire_format() &&
2036       descriptor->type() == FieldDescriptor::TYPE_MESSAGE &&
2037       descriptor->label() == FieldDescriptor::LABEL_OPTIONAL) {
2038     ScopedPyObjectPtr message_name(PyString_FromStringAndSize(
2039         descriptor->message_type()->full_name().c_str(),
2040         descriptor->message_type()->full_name().size()));
2041     if (message_name == NULL) {
2042       return NULL;
2043     }
2044     PyDict_SetItem(extensions_by_name.get(), message_name.get(),
2045                    extension_handle);
2046   }
2047 
2048   Py_RETURN_NONE;
2049 }
2050 
2051 static PyObject* SetInParent(CMessage* self, PyObject* args) {
2052   AssureWritable(self);
2053   Py_RETURN_NONE;
2054 }
2055 
2056 static PyObject* WhichOneof(CMessage* self, PyObject* arg) {
2057   Py_ssize_t name_size;
2058   char *name_data;
2059   if (PyString_AsStringAndSize(arg, &name_data, &name_size) < 0)
2060     return NULL;
2061   string oneof_name = string(name_data, name_size);
2062   const OneofDescriptor* oneof_desc =
2063       self->message->GetDescriptor()->FindOneofByName(oneof_name);
2064   if (oneof_desc == NULL) {
2065     PyErr_Format(PyExc_ValueError,
2066                  "Protocol message has no oneof \"%s\" field.",
2067                  oneof_name.c_str());
2068     return NULL;
2069   }
2070   const FieldDescriptor* field_in_oneof =
2071       self->message->GetReflection()->GetOneofFieldDescriptor(
2072           *self->message, oneof_desc);
2073   if (field_in_oneof == NULL) {
2074     Py_RETURN_NONE;
2075   } else {
2076     const string& name = field_in_oneof->name();
2077     return PyString_FromStringAndSize(name.c_str(), name.size());
2078   }
2079 }
2080 
2081 static PyObject* GetExtensionDict(CMessage* self, void *closure);
2082 
2083 static PyObject* ListFields(CMessage* self) {
2084   vector<const FieldDescriptor*> fields;
2085   self->message->GetReflection()->ListFields(*self->message, &fields);
2086 
2087   // Normally, the list will be exactly the size of the fields.
2088   ScopedPyObjectPtr all_fields(PyList_New(fields.size()));
2089   if (all_fields == NULL) {
2090     return NULL;
2091   }
2092 
2093   // When there are unknown extensions, the py list will *not* contain
2094   // the field information.  Thus the actual size of the py list will be
2095   // smaller than the size of fields.  Set the actual size at the end.
2096   Py_ssize_t actual_size = 0;
2097   for (size_t i = 0; i < fields.size(); ++i) {
2098     ScopedPyObjectPtr t(PyTuple_New(2));
2099     if (t == NULL) {
2100       return NULL;
2101     }
2102 
2103     if (fields[i]->is_extension()) {
2104       ScopedPyObjectPtr extension_field(
2105           PyFieldDescriptor_FromDescriptor(fields[i]));
2106       if (extension_field == NULL) {
2107         return NULL;
2108       }
2109       // With C++ descriptors, the field can always be retrieved, but for
2110       // unknown extensions which have not been imported in Python code, there
2111       // is no message class and we cannot retrieve the value.
2112       // TODO(amauryfa): consider building the class on the fly!
2113       if (fields[i]->message_type() != NULL &&
2114           cdescriptor_pool::GetMessageClass(
2115               GetDescriptorPoolForMessage(self),
2116               fields[i]->message_type()) == NULL) {
2117         PyErr_Clear();
2118         continue;
2119       }
2120       ScopedPyObjectPtr extensions(GetExtensionDict(self, NULL));
2121       if (extensions == NULL) {
2122         return NULL;
2123       }
2124       // 'extension' reference later stolen by PyTuple_SET_ITEM.
2125       PyObject* extension = PyObject_GetItem(
2126           extensions.get(), extension_field.get());
2127       if (extension == NULL) {
2128         return NULL;
2129       }
2130       PyTuple_SET_ITEM(t.get(), 0, extension_field.release());
2131       // Steals reference to 'extension'
2132       PyTuple_SET_ITEM(t.get(), 1, extension);
2133     } else {
2134       // Normal field
2135       const string& field_name = fields[i]->name();
2136       ScopedPyObjectPtr py_field_name(PyString_FromStringAndSize(
2137           field_name.c_str(), field_name.length()));
2138       if (py_field_name == NULL) {
2139         PyErr_SetString(PyExc_ValueError, "bad string");
2140         return NULL;
2141       }
2142       ScopedPyObjectPtr field_descriptor(
2143           PyFieldDescriptor_FromDescriptor(fields[i]));
2144       if (field_descriptor == NULL) {
2145         return NULL;
2146       }
2147 
2148       PyObject* field_value = GetAttr(self, py_field_name.get());
2149       if (field_value == NULL) {
2150         PyErr_SetObject(PyExc_ValueError, py_field_name.get());
2151         return NULL;
2152       }
2153       PyTuple_SET_ITEM(t.get(), 0, field_descriptor.release());
2154       PyTuple_SET_ITEM(t.get(), 1, field_value);
2155     }
2156     PyList_SET_ITEM(all_fields.get(), actual_size, t.release());
2157     ++actual_size;
2158   }
2159   if (static_cast<size_t>(actual_size) != fields.size() &&
2160       (PyList_SetSlice(all_fields.get(), actual_size, fields.size(), NULL) <
2161        0)) {
2162     return NULL;
2163   }
2164   return all_fields.release();
2165 }
2166 
2167 static PyObject* DiscardUnknownFields(CMessage* self) {
2168   AssureWritable(self);
2169   self->message->DiscardUnknownFields();
2170   Py_RETURN_NONE;
2171 }
2172 
2173 PyObject* FindInitializationErrors(CMessage* self) {
2174   Message* message = self->message;
2175   vector<string> errors;
2176   message->FindInitializationErrors(&errors);
2177 
2178   PyObject* error_list = PyList_New(errors.size());
2179   if (error_list == NULL) {
2180     return NULL;
2181   }
2182   for (size_t i = 0; i < errors.size(); ++i) {
2183     const string& error = errors[i];
2184     PyObject* error_string = PyString_FromStringAndSize(
2185         error.c_str(), error.length());
2186     if (error_string == NULL) {
2187       Py_DECREF(error_list);
2188       return NULL;
2189     }
2190     PyList_SET_ITEM(error_list, i, error_string);
2191   }
2192   return error_list;
2193 }
2194 
2195 static PyObject* RichCompare(CMessage* self, PyObject* other, int opid) {
2196   // Only equality comparisons are implemented.
2197   if (opid != Py_EQ && opid != Py_NE) {
2198     Py_INCREF(Py_NotImplemented);
2199     return Py_NotImplemented;
2200   }
2201   bool equals = true;
2202   // If other is not a message, it cannot be equal.
2203   if (!PyObject_TypeCheck(other, &CMessage_Type)) {
2204     equals = false;
2205   }
2206   const google::protobuf::Message* other_message =
2207       reinterpret_cast<CMessage*>(other)->message;
2208   // If messages don't have the same descriptors, they are not equal.
2209   if (equals &&
2210       self->message->GetDescriptor() != other_message->GetDescriptor()) {
2211     equals = false;
2212   }
2213   // Check the message contents.
2214   if (equals && !google::protobuf::util::MessageDifferencer::Equals(
2215           *self->message,
2216           *reinterpret_cast<CMessage*>(other)->message)) {
2217     equals = false;
2218   }
2219   if (equals ^ (opid == Py_EQ)) {
2220     Py_RETURN_FALSE;
2221   } else {
2222     Py_RETURN_TRUE;
2223   }
2224 }
2225 
2226 PyObject* InternalGetScalar(const Message* message,
2227                             const FieldDescriptor* field_descriptor) {
2228   const Reflection* reflection = message->GetReflection();
2229 
2230   if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
2231     return NULL;
2232   }
2233 
2234   PyObject* result = NULL;
2235   switch (field_descriptor->cpp_type()) {
2236     case FieldDescriptor::CPPTYPE_INT32: {
2237       int32 value = reflection->GetInt32(*message, field_descriptor);
2238       result = PyInt_FromLong(value);
2239       break;
2240     }
2241     case FieldDescriptor::CPPTYPE_INT64: {
2242       int64 value = reflection->GetInt64(*message, field_descriptor);
2243       result = PyLong_FromLongLong(value);
2244       break;
2245     }
2246     case FieldDescriptor::CPPTYPE_UINT32: {
2247       uint32 value = reflection->GetUInt32(*message, field_descriptor);
2248       result = PyInt_FromSize_t(value);
2249       break;
2250     }
2251     case FieldDescriptor::CPPTYPE_UINT64: {
2252       uint64 value = reflection->GetUInt64(*message, field_descriptor);
2253       result = PyLong_FromUnsignedLongLong(value);
2254       break;
2255     }
2256     case FieldDescriptor::CPPTYPE_FLOAT: {
2257       float value = reflection->GetFloat(*message, field_descriptor);
2258       result = PyFloat_FromDouble(value);
2259       break;
2260     }
2261     case FieldDescriptor::CPPTYPE_DOUBLE: {
2262       double value = reflection->GetDouble(*message, field_descriptor);
2263       result = PyFloat_FromDouble(value);
2264       break;
2265     }
2266     case FieldDescriptor::CPPTYPE_BOOL: {
2267       bool value = reflection->GetBool(*message, field_descriptor);
2268       result = PyBool_FromLong(value);
2269       break;
2270     }
2271     case FieldDescriptor::CPPTYPE_STRING: {
2272       string value = reflection->GetString(*message, field_descriptor);
2273       result = ToStringObject(field_descriptor, value);
2274       break;
2275     }
2276     case FieldDescriptor::CPPTYPE_ENUM: {
2277       if (!message->GetReflection()->SupportsUnknownEnumValues() &&
2278           !message->GetReflection()->HasField(*message, field_descriptor)) {
2279         // Look for the value in the unknown fields.
2280         const UnknownFieldSet& unknown_field_set =
2281             message->GetReflection()->GetUnknownFields(*message);
2282         for (int i = 0; i < unknown_field_set.field_count(); ++i) {
2283           if (unknown_field_set.field(i).number() ==
2284               field_descriptor->number() &&
2285               unknown_field_set.field(i).type() ==
2286               google::protobuf::UnknownField::TYPE_VARINT) {
2287             result = PyInt_FromLong(unknown_field_set.field(i).varint());
2288             break;
2289           }
2290         }
2291       }
2292 
2293       if (result == NULL) {
2294         const EnumValueDescriptor* enum_value =
2295             message->GetReflection()->GetEnum(*message, field_descriptor);
2296         result = PyInt_FromLong(enum_value->number());
2297       }
2298       break;
2299     }
2300     default:
2301       PyErr_Format(
2302           PyExc_SystemError, "Getting a value from a field of unknown type %d",
2303           field_descriptor->cpp_type());
2304   }
2305 
2306   return result;
2307 }
2308 
2309 PyObject* InternalGetSubMessage(
2310     CMessage* self, const FieldDescriptor* field_descriptor) {
2311   const Reflection* reflection = self->message->GetReflection();
2312   PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
2313   const Message& sub_message = reflection->GetMessage(
2314       *self->message, field_descriptor, pool->message_factory);
2315 
2316   CMessageClass* message_class = cdescriptor_pool::GetMessageClass(
2317       pool, field_descriptor->message_type());
2318   if (message_class == NULL) {
2319     return NULL;
2320   }
2321 
2322   CMessage* cmsg = cmessage::NewEmptyMessage(message_class);
2323   if (cmsg == NULL) {
2324     return NULL;
2325   }
2326 
2327   cmsg->owner = self->owner;
2328   cmsg->parent = self;
2329   cmsg->parent_field_descriptor = field_descriptor;
2330   cmsg->read_only = !reflection->HasField(*self->message, field_descriptor);
2331   cmsg->message = const_cast<Message*>(&sub_message);
2332 
2333   return reinterpret_cast<PyObject*>(cmsg);
2334 }
2335 
2336 int InternalSetNonOneofScalar(
2337     Message* message,
2338     const FieldDescriptor* field_descriptor,
2339     PyObject* arg) {
2340   const Reflection* reflection = message->GetReflection();
2341 
2342   if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
2343     return -1;
2344   }
2345 
2346   switch (field_descriptor->cpp_type()) {
2347     case FieldDescriptor::CPPTYPE_INT32: {
2348       GOOGLE_CHECK_GET_INT32(arg, value, -1);
2349       reflection->SetInt32(message, field_descriptor, value);
2350       break;
2351     }
2352     case FieldDescriptor::CPPTYPE_INT64: {
2353       GOOGLE_CHECK_GET_INT64(arg, value, -1);
2354       reflection->SetInt64(message, field_descriptor, value);
2355       break;
2356     }
2357     case FieldDescriptor::CPPTYPE_UINT32: {
2358       GOOGLE_CHECK_GET_UINT32(arg, value, -1);
2359       reflection->SetUInt32(message, field_descriptor, value);
2360       break;
2361     }
2362     case FieldDescriptor::CPPTYPE_UINT64: {
2363       GOOGLE_CHECK_GET_UINT64(arg, value, -1);
2364       reflection->SetUInt64(message, field_descriptor, value);
2365       break;
2366     }
2367     case FieldDescriptor::CPPTYPE_FLOAT: {
2368       GOOGLE_CHECK_GET_FLOAT(arg, value, -1);
2369       reflection->SetFloat(message, field_descriptor, value);
2370       break;
2371     }
2372     case FieldDescriptor::CPPTYPE_DOUBLE: {
2373       GOOGLE_CHECK_GET_DOUBLE(arg, value, -1);
2374       reflection->SetDouble(message, field_descriptor, value);
2375       break;
2376     }
2377     case FieldDescriptor::CPPTYPE_BOOL: {
2378       GOOGLE_CHECK_GET_BOOL(arg, value, -1);
2379       reflection->SetBool(message, field_descriptor, value);
2380       break;
2381     }
2382     case FieldDescriptor::CPPTYPE_STRING: {
2383       if (!CheckAndSetString(
2384           arg, message, field_descriptor, reflection, false, -1)) {
2385         return -1;
2386       }
2387       break;
2388     }
2389     case FieldDescriptor::CPPTYPE_ENUM: {
2390       GOOGLE_CHECK_GET_INT32(arg, value, -1);
2391       if (reflection->SupportsUnknownEnumValues()) {
2392         reflection->SetEnumValue(message, field_descriptor, value);
2393       } else {
2394         const EnumDescriptor* enum_descriptor = field_descriptor->enum_type();
2395         const EnumValueDescriptor* enum_value =
2396             enum_descriptor->FindValueByNumber(value);
2397         if (enum_value != NULL) {
2398           reflection->SetEnum(message, field_descriptor, enum_value);
2399         } else {
2400           PyErr_Format(PyExc_ValueError, "Unknown enum value: %d", value);
2401           return -1;
2402         }
2403       }
2404       break;
2405     }
2406     default:
2407       PyErr_Format(
2408           PyExc_SystemError, "Setting value to a field of unknown type %d",
2409           field_descriptor->cpp_type());
2410       return -1;
2411   }
2412 
2413   return 0;
2414 }
2415 
2416 int InternalSetScalar(
2417     CMessage* self,
2418     const FieldDescriptor* field_descriptor,
2419     PyObject* arg) {
2420   if (!CheckFieldBelongsToMessage(field_descriptor, self->message)) {
2421     return -1;
2422   }
2423 
2424   if (MaybeReleaseOverlappingOneofField(self, field_descriptor) < 0) {
2425     return -1;
2426   }
2427 
2428   return InternalSetNonOneofScalar(self->message, field_descriptor, arg);
2429 }
2430 
2431 PyObject* FromString(PyTypeObject* cls, PyObject* serialized) {
2432   PyObject* py_cmsg = PyObject_CallObject(
2433       reinterpret_cast<PyObject*>(cls), NULL);
2434   if (py_cmsg == NULL) {
2435     return NULL;
2436   }
2437   CMessage* cmsg = reinterpret_cast<CMessage*>(py_cmsg);
2438 
2439   ScopedPyObjectPtr py_length(MergeFromString(cmsg, serialized));
2440   if (py_length == NULL) {
2441     Py_DECREF(py_cmsg);
2442     return NULL;
2443   }
2444 
2445   return py_cmsg;
2446 }
2447 
2448 PyObject* DeepCopy(CMessage* self, PyObject* arg) {
2449   PyObject* clone = PyObject_CallObject(
2450       reinterpret_cast<PyObject*>(Py_TYPE(self)), NULL);
2451   if (clone == NULL) {
2452     return NULL;
2453   }
2454   if (!PyObject_TypeCheck(clone, &CMessage_Type)) {
2455     Py_DECREF(clone);
2456     return NULL;
2457   }
2458   if (ScopedPyObjectPtr(MergeFrom(
2459           reinterpret_cast<CMessage*>(clone),
2460           reinterpret_cast<PyObject*>(self))) == NULL) {
2461     Py_DECREF(clone);
2462     return NULL;
2463   }
2464   return clone;
2465 }
2466 
2467 PyObject* ToUnicode(CMessage* self) {
2468   // Lazy import to prevent circular dependencies
2469   ScopedPyObjectPtr text_format(
2470       PyImport_ImportModule("google.protobuf.text_format"));
2471   if (text_format == NULL) {
2472     return NULL;
2473   }
2474   ScopedPyObjectPtr method_name(PyString_FromString("MessageToString"));
2475   if (method_name == NULL) {
2476     return NULL;
2477   }
2478   Py_INCREF(Py_True);
2479   ScopedPyObjectPtr encoded(PyObject_CallMethodObjArgs(
2480       text_format.get(), method_name.get(), self, Py_True, NULL));
2481   Py_DECREF(Py_True);
2482   if (encoded == NULL) {
2483     return NULL;
2484   }
2485 #if PY_MAJOR_VERSION < 3
2486   PyObject* decoded = PyString_AsDecodedObject(encoded.get(), "utf-8", NULL);
2487 #else
2488   PyObject* decoded = PyUnicode_FromEncodedObject(encoded.get(), "utf-8", NULL);
2489 #endif
2490   if (decoded == NULL) {
2491     return NULL;
2492   }
2493   return decoded;
2494 }
2495 
2496 PyObject* Reduce(CMessage* self) {
2497   ScopedPyObjectPtr constructor(reinterpret_cast<PyObject*>(Py_TYPE(self)));
2498   constructor.inc();
2499   ScopedPyObjectPtr args(PyTuple_New(0));
2500   if (args == NULL) {
2501     return NULL;
2502   }
2503   ScopedPyObjectPtr state(PyDict_New());
2504   if (state == NULL) {
2505     return  NULL;
2506   }
2507   ScopedPyObjectPtr serialized(SerializePartialToString(self));
2508   if (serialized == NULL) {
2509     return NULL;
2510   }
2511   if (PyDict_SetItemString(state.get(), "serialized", serialized.get()) < 0) {
2512     return NULL;
2513   }
2514   return Py_BuildValue("OOO", constructor.get(), args.get(), state.get());
2515 }
2516 
2517 PyObject* SetState(CMessage* self, PyObject* state) {
2518   if (!PyDict_Check(state)) {
2519     PyErr_SetString(PyExc_TypeError, "state not a dict");
2520     return NULL;
2521   }
2522   PyObject* serialized = PyDict_GetItemString(state, "serialized");
2523   if (serialized == NULL) {
2524     return NULL;
2525   }
2526   if (ScopedPyObjectPtr(ParseFromString(self, serialized)) == NULL) {
2527     return NULL;
2528   }
2529   Py_RETURN_NONE;
2530 }
2531 
2532 // CMessage static methods:
2533 PyObject* _CheckCalledFromGeneratedFile(PyObject* unused,
2534                                         PyObject* unused_arg) {
2535   if (!_CalledFromGeneratedFile(1)) {
2536     PyErr_SetString(PyExc_TypeError,
2537                     "Descriptors should not be created directly, "
2538                     "but only retrieved from their parent.");
2539     return NULL;
2540   }
2541   Py_RETURN_NONE;
2542 }
2543 
2544 static PyObject* GetExtensionDict(CMessage* self, void *closure) {
2545   if (self->extensions)  {
2546     Py_INCREF(self->extensions);
2547     return reinterpret_cast<PyObject*>(self->extensions);
2548   }
2549 
2550   // If there are extension_ranges, the message is "extendable". Allocate a
2551   // dictionary to store the extension fields.
2552   const Descriptor* descriptor = GetMessageDescriptor(Py_TYPE(self));
2553   if (descriptor->extension_range_count() > 0) {
2554     ExtensionDict* extension_dict = extension_dict::NewExtensionDict(self);
2555     if (extension_dict == NULL) {
2556       return NULL;
2557     }
2558     self->extensions = extension_dict;
2559     Py_INCREF(self->extensions);
2560     return reinterpret_cast<PyObject*>(self->extensions);
2561   }
2562 
2563   PyErr_SetNone(PyExc_AttributeError);
2564   return NULL;
2565 }
2566 
2567 static PyGetSetDef Getters[] = {
2568   {"Extensions", (getter)GetExtensionDict, NULL, "Extension dict"},
2569   {NULL}
2570 };
2571 
2572 static PyMethodDef Methods[] = {
2573   { "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
2574     "Makes a deep copy of the class." },
2575   { "__reduce__", (PyCFunction)Reduce, METH_NOARGS,
2576     "Outputs picklable representation of the message." },
2577   { "__setstate__", (PyCFunction)SetState, METH_O,
2578     "Inputs picklable representation of the message." },
2579   { "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
2580     "Outputs a unicode representation of the message." },
2581   { "ByteSize", (PyCFunction)ByteSize, METH_NOARGS,
2582     "Returns the size of the message in bytes." },
2583   { "Clear", (PyCFunction)Clear, METH_NOARGS,
2584     "Clears the message." },
2585   { "ClearExtension", (PyCFunction)ClearExtension, METH_O,
2586     "Clears a message field." },
2587   { "ClearField", (PyCFunction)ClearField, METH_O,
2588     "Clears a message field." },
2589   { "CopyFrom", (PyCFunction)CopyFrom, METH_O,
2590     "Copies a protocol message into the current message." },
2591   { "DiscardUnknownFields", (PyCFunction)DiscardUnknownFields, METH_NOARGS,
2592     "Discards the unknown fields." },
2593   { "FindInitializationErrors", (PyCFunction)FindInitializationErrors,
2594     METH_NOARGS,
2595     "Finds unset required fields." },
2596   { "FromString", (PyCFunction)FromString, METH_O | METH_CLASS,
2597     "Creates new method instance from given serialized data." },
2598   { "HasExtension", (PyCFunction)HasExtension, METH_O,
2599     "Checks if a message field is set." },
2600   { "HasField", (PyCFunction)HasField, METH_O,
2601     "Checks if a message field is set." },
2602   { "IsInitialized", (PyCFunction)IsInitialized, METH_VARARGS,
2603     "Checks if all required fields of a protocol message are set." },
2604   { "ListFields", (PyCFunction)ListFields, METH_NOARGS,
2605     "Lists all set fields of a message." },
2606   { "MergeFrom", (PyCFunction)MergeFrom, METH_O,
2607     "Merges a protocol message into the current message." },
2608   { "MergeFromString", (PyCFunction)MergeFromString, METH_O,
2609     "Merges a serialized message into the current message." },
2610   { "ParseFromString", (PyCFunction)ParseFromString, METH_O,
2611     "Parses a serialized message into the current message." },
2612   { "RegisterExtension", (PyCFunction)RegisterExtension, METH_O | METH_CLASS,
2613     "Registers an extension with the current message." },
2614   { "SerializePartialToString", (PyCFunction)SerializePartialToString,
2615     METH_NOARGS,
2616     "Serializes the message to a string, even if it isn't initialized." },
2617   { "SerializeToString", (PyCFunction)SerializeToString, METH_NOARGS,
2618     "Serializes the message to a string, only for initialized messages." },
2619   { "SetInParent", (PyCFunction)SetInParent, METH_NOARGS,
2620     "Sets the has bit of the given field in its parent message." },
2621   { "WhichOneof", (PyCFunction)WhichOneof, METH_O,
2622     "Returns the name of the field set inside a oneof, "
2623     "or None if no field is set." },
2624 
2625   // Static Methods.
2626   { "_CheckCalledFromGeneratedFile", (PyCFunction)_CheckCalledFromGeneratedFile,
2627     METH_NOARGS | METH_STATIC,
2628     "Raises TypeError if the caller is not in a _pb2.py file."},
2629   { NULL, NULL}
2630 };
2631 
2632 static bool SetCompositeField(
2633     CMessage* self, PyObject* name, PyObject* value) {
2634   if (self->composite_fields == NULL) {
2635     self->composite_fields = PyDict_New();
2636     if (self->composite_fields == NULL) {
2637       return false;
2638     }
2639   }
2640   return PyDict_SetItem(self->composite_fields, name, value) == 0;
2641 }
2642 
2643 PyObject* GetAttr(CMessage* self, PyObject* name) {
2644   PyObject* value = self->composite_fields ?
2645       PyDict_GetItem(self->composite_fields, name) : NULL;
2646   if (value != NULL) {
2647     Py_INCREF(value);
2648     return value;
2649   }
2650 
2651   const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
2652   if (field_descriptor == NULL) {
2653     return CMessage_Type.tp_base->tp_getattro(
2654         reinterpret_cast<PyObject*>(self), name);
2655   }
2656 
2657   if (field_descriptor->is_map()) {
2658     PyObject* py_container = NULL;
2659     const Descriptor* entry_type = field_descriptor->message_type();
2660     const FieldDescriptor* value_type = entry_type->FindFieldByName("value");
2661     if (value_type->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2662       CMessageClass* value_class = cdescriptor_pool::GetMessageClass(
2663           GetDescriptorPoolForMessage(self), value_type->message_type());
2664       if (value_class == NULL) {
2665         return NULL;
2666       }
2667       py_container =
2668           NewMessageMapContainer(self, field_descriptor, value_class);
2669     } else {
2670       py_container = NewScalarMapContainer(self, field_descriptor);
2671     }
2672     if (py_container == NULL) {
2673       return NULL;
2674     }
2675     if (!SetCompositeField(self, name, py_container)) {
2676       Py_DECREF(py_container);
2677       return NULL;
2678     }
2679     return py_container;
2680   }
2681 
2682   if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
2683     PyObject* py_container = NULL;
2684     if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2685       CMessageClass* message_class = cdescriptor_pool::GetMessageClass(
2686           GetDescriptorPoolForMessage(self), field_descriptor->message_type());
2687       if (message_class == NULL) {
2688         return NULL;
2689       }
2690       py_container = repeated_composite_container::NewContainer(
2691           self, field_descriptor, message_class);
2692     } else {
2693       py_container = repeated_scalar_container::NewContainer(
2694           self, field_descriptor);
2695     }
2696     if (py_container == NULL) {
2697       return NULL;
2698     }
2699     if (!SetCompositeField(self, name, py_container)) {
2700       Py_DECREF(py_container);
2701       return NULL;
2702     }
2703     return py_container;
2704   }
2705 
2706   if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2707     PyObject* sub_message = InternalGetSubMessage(self, field_descriptor);
2708     if (sub_message == NULL) {
2709       return NULL;
2710     }
2711     if (!SetCompositeField(self, name, sub_message)) {
2712       Py_DECREF(sub_message);
2713       return NULL;
2714     }
2715     return sub_message;
2716   }
2717 
2718   return InternalGetScalar(self->message, field_descriptor);
2719 }
2720 
2721 int SetAttr(CMessage* self, PyObject* name, PyObject* value) {
2722   if (self->composite_fields && PyDict_Contains(self->composite_fields, name)) {
2723     PyErr_SetString(PyExc_TypeError, "Can't set composite field");
2724     return -1;
2725   }
2726 
2727   const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
2728   if (field_descriptor != NULL) {
2729     AssureWritable(self);
2730     if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
2731       PyErr_Format(PyExc_AttributeError, "Assignment not allowed to repeated "
2732                    "field \"%s\" in protocol message object.",
2733                    field_descriptor->name().c_str());
2734       return -1;
2735     } else {
2736       if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2737         PyErr_Format(PyExc_AttributeError, "Assignment not allowed to "
2738                      "field \"%s\" in protocol message object.",
2739                      field_descriptor->name().c_str());
2740         return -1;
2741       } else {
2742         return InternalSetScalar(self, field_descriptor, value);
2743       }
2744     }
2745   }
2746 
2747   PyErr_Format(PyExc_AttributeError,
2748                "Assignment not allowed "
2749                "(no field \"%s\" in protocol message object).",
2750                PyString_AsString(name));
2751   return -1;
2752 }
2753 
2754 }  // namespace cmessage
2755 
2756 PyTypeObject CMessage_Type = {
2757   PyVarObject_HEAD_INIT(&CMessageClass_Type, 0)
2758   FULL_MODULE_NAME ".CMessage",        // tp_name
2759   sizeof(CMessage),                    // tp_basicsize
2760   0,                                   //  tp_itemsize
2761   (destructor)cmessage::Dealloc,       //  tp_dealloc
2762   0,                                   //  tp_print
2763   0,                                   //  tp_getattr
2764   0,                                   //  tp_setattr
2765   0,                                   //  tp_compare
2766   (reprfunc)cmessage::ToStr,           //  tp_repr
2767   0,                                   //  tp_as_number
2768   0,                                   //  tp_as_sequence
2769   0,                                   //  tp_as_mapping
2770   PyObject_HashNotImplemented,         //  tp_hash
2771   0,                                   //  tp_call
2772   (reprfunc)cmessage::ToStr,           //  tp_str
2773   (getattrofunc)cmessage::GetAttr,     //  tp_getattro
2774   (setattrofunc)cmessage::SetAttr,     //  tp_setattro
2775   0,                                   //  tp_as_buffer
2776   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  //  tp_flags
2777   "A ProtocolMessage",                 //  tp_doc
2778   0,                                   //  tp_traverse
2779   0,                                   //  tp_clear
2780   (richcmpfunc)cmessage::RichCompare,  //  tp_richcompare
2781   0,                                   //  tp_weaklistoffset
2782   0,                                   //  tp_iter
2783   0,                                   //  tp_iternext
2784   cmessage::Methods,                   //  tp_methods
2785   0,                                   //  tp_members
2786   cmessage::Getters,                   //  tp_getset
2787   0,                                   //  tp_base
2788   0,                                   //  tp_dict
2789   0,                                   //  tp_descr_get
2790   0,                                   //  tp_descr_set
2791   0,                                   //  tp_dictoffset
2792   (initproc)cmessage::Init,            //  tp_init
2793   0,                                   //  tp_alloc
2794   cmessage::New,                       //  tp_new
2795 };
2796 
2797 // --- Exposing the C proto living inside Python proto to C code:
2798 
2799 const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg);
2800 Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg);
2801 
2802 static const Message* GetCProtoInsidePyProtoImpl(PyObject* msg) {
2803   if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
2804     return NULL;
2805   }
2806   CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
2807   return cmsg->message;
2808 }
2809 
2810 static Message* MutableCProtoInsidePyProtoImpl(PyObject* msg) {
2811   if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
2812     return NULL;
2813   }
2814   CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
2815   if ((cmsg->composite_fields && PyDict_Size(cmsg->composite_fields) != 0) ||
2816       (cmsg->extensions != NULL &&
2817        PyDict_Size(cmsg->extensions->values) != 0)) {
2818     // There is currently no way of accurately syncing arbitrary changes to
2819     // the underlying C++ message back to the CMessage (e.g. removed repeated
2820     // composite containers). We only allow direct mutation of the underlying
2821     // C++ message if there is no child data in the CMessage.
2822     return NULL;
2823   }
2824   cmessage::AssureWritable(cmsg);
2825   return cmsg->message;
2826 }
2827 
2828 static const char module_docstring[] =
2829 "python-proto2 is a module that can be used to enhance proto2 Python API\n"
2830 "performance.\n"
2831 "\n"
2832 "It provides access to the protocol buffers C++ reflection API that\n"
2833 "implements the basic protocol buffer functions.";
2834 
2835 void InitGlobals() {
2836   // TODO(gps): Check all return values in this function for NULL and propagate
2837   // the error (MemoryError) on up to result in an import failure.  These should
2838   // also be freed and reset to NULL during finalization.
2839   kPythonZero = PyInt_FromLong(0);
2840   kint32min_py = PyInt_FromLong(kint32min);
2841   kint32max_py = PyInt_FromLong(kint32max);
2842   kuint32max_py = PyLong_FromLongLong(kuint32max);
2843   kint64min_py = PyLong_FromLongLong(kint64min);
2844   kint64max_py = PyLong_FromLongLong(kint64max);
2845   kuint64max_py = PyLong_FromUnsignedLongLong(kuint64max);
2846 
2847   kDESCRIPTOR = PyString_FromString("DESCRIPTOR");
2848   k_cdescriptor = PyString_FromString("_cdescriptor");
2849   kfull_name = PyString_FromString("full_name");
2850   k_extensions_by_name = PyString_FromString("_extensions_by_name");
2851   k_extensions_by_number = PyString_FromString("_extensions_by_number");
2852 
2853   PyObject *dummy_obj = PySet_New(NULL);
2854   kEmptyWeakref = PyWeakref_NewRef(dummy_obj, NULL);
2855   Py_DECREF(dummy_obj);
2856 }
2857 
2858 bool InitProto2MessageModule(PyObject *m) {
2859   // Initialize types and globals in descriptor.cc
2860   if (!InitDescriptor()) {
2861     return false;
2862   }
2863 
2864   // Initialize types and globals in descriptor_pool.cc
2865   if (!InitDescriptorPool()) {
2866     return false;
2867   }
2868 
2869   // Initialize constants defined in this file.
2870   InitGlobals();
2871 
2872   CMessageClass_Type.tp_base = &PyType_Type;
2873   if (PyType_Ready(&CMessageClass_Type) < 0) {
2874     return false;
2875   }
2876   PyModule_AddObject(m, "MessageMeta",
2877                      reinterpret_cast<PyObject*>(&CMessageClass_Type));
2878 
2879   if (PyType_Ready(&CMessage_Type) < 0) {
2880     return false;
2881   }
2882 
2883   // DESCRIPTOR is set on each protocol buffer message class elsewhere, but set
2884   // it here as well to document that subclasses need to set it.
2885   PyDict_SetItem(CMessage_Type.tp_dict, kDESCRIPTOR, Py_None);
2886   // Subclasses with message extensions will override _extensions_by_name and
2887   // _extensions_by_number with fresh mutable dictionaries in AddDescriptors.
2888   // All other classes can share this same immutable mapping.
2889   ScopedPyObjectPtr empty_dict(PyDict_New());
2890   if (empty_dict == NULL) {
2891     return false;
2892   }
2893   ScopedPyObjectPtr immutable_dict(PyDictProxy_New(empty_dict.get()));
2894   if (immutable_dict == NULL) {
2895     return false;
2896   }
2897   if (PyDict_SetItem(CMessage_Type.tp_dict,
2898                      k_extensions_by_name, immutable_dict.get()) < 0) {
2899     return false;
2900   }
2901   if (PyDict_SetItem(CMessage_Type.tp_dict,
2902                      k_extensions_by_number, immutable_dict.get()) < 0) {
2903     return false;
2904   }
2905 
2906   PyModule_AddObject(m, "Message", reinterpret_cast<PyObject*>(&CMessage_Type));
2907 
2908   // Initialize Repeated container types.
2909   {
2910     if (PyType_Ready(&RepeatedScalarContainer_Type) < 0) {
2911       return false;
2912     }
2913 
2914     PyModule_AddObject(m, "RepeatedScalarContainer",
2915                        reinterpret_cast<PyObject*>(
2916                            &RepeatedScalarContainer_Type));
2917 
2918     if (PyType_Ready(&RepeatedCompositeContainer_Type) < 0) {
2919       return false;
2920     }
2921 
2922     PyModule_AddObject(
2923         m, "RepeatedCompositeContainer",
2924         reinterpret_cast<PyObject*>(
2925             &RepeatedCompositeContainer_Type));
2926 
2927     // Register them as collections.Sequence
2928     ScopedPyObjectPtr collections(PyImport_ImportModule("collections"));
2929     if (collections == NULL) {
2930       return false;
2931     }
2932     ScopedPyObjectPtr mutable_sequence(
2933         PyObject_GetAttrString(collections.get(), "MutableSequence"));
2934     if (mutable_sequence == NULL) {
2935       return false;
2936     }
2937     if (ScopedPyObjectPtr(
2938             PyObject_CallMethod(mutable_sequence.get(), "register", "O",
2939                                 &RepeatedScalarContainer_Type)) == NULL) {
2940       return false;
2941     }
2942     if (ScopedPyObjectPtr(
2943             PyObject_CallMethod(mutable_sequence.get(), "register", "O",
2944                                 &RepeatedCompositeContainer_Type)) == NULL) {
2945       return false;
2946     }
2947   }
2948 
2949   // Initialize Map container types.
2950   {
2951     // ScalarMapContainer_Type derives from our MutableMapping type.
2952     ScopedPyObjectPtr containers(PyImport_ImportModule(
2953         "google.protobuf.internal.containers"));
2954     if (containers == NULL) {
2955       return false;
2956     }
2957 
2958     ScopedPyObjectPtr mutable_mapping(
2959         PyObject_GetAttrString(containers.get(), "MutableMapping"));
2960     if (mutable_mapping == NULL) {
2961       return false;
2962     }
2963 
2964     if (!PyObject_TypeCheck(mutable_mapping.get(), &PyType_Type)) {
2965       return false;
2966     }
2967 
2968     Py_INCREF(mutable_mapping.get());
2969 #if PY_MAJOR_VERSION >= 3
2970     PyObject* bases = PyTuple_New(1);
2971     PyTuple_SET_ITEM(bases, 0, mutable_mapping.get());
2972 
2973     ScalarMapContainer_Type =
2974         PyType_FromSpecWithBases(&ScalarMapContainer_Type_spec, bases);
2975     PyModule_AddObject(m, "ScalarMapContainer", ScalarMapContainer_Type);
2976 #else
2977     ScalarMapContainer_Type.tp_base =
2978         reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
2979 
2980     if (PyType_Ready(&ScalarMapContainer_Type) < 0) {
2981       return false;
2982     }
2983 
2984     PyModule_AddObject(m, "ScalarMapContainer",
2985                        reinterpret_cast<PyObject*>(&ScalarMapContainer_Type));
2986 #endif
2987 
2988     if (PyType_Ready(&MapIterator_Type) < 0) {
2989       return false;
2990     }
2991 
2992     PyModule_AddObject(m, "MapIterator",
2993                        reinterpret_cast<PyObject*>(&MapIterator_Type));
2994 
2995 
2996 #if PY_MAJOR_VERSION >= 3
2997     MessageMapContainer_Type =
2998         PyType_FromSpecWithBases(&MessageMapContainer_Type_spec, bases);
2999     PyModule_AddObject(m, "MessageMapContainer", MessageMapContainer_Type);
3000 #else
3001     Py_INCREF(mutable_mapping.get());
3002     MessageMapContainer_Type.tp_base =
3003         reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
3004 
3005     if (PyType_Ready(&MessageMapContainer_Type) < 0) {
3006       return false;
3007     }
3008 
3009     PyModule_AddObject(m, "MessageMapContainer",
3010                        reinterpret_cast<PyObject*>(&MessageMapContainer_Type));
3011 #endif
3012   }
3013 
3014   if (PyType_Ready(&ExtensionDict_Type) < 0) {
3015     return false;
3016   }
3017   PyModule_AddObject(
3018       m, "ExtensionDict",
3019       reinterpret_cast<PyObject*>(&ExtensionDict_Type));
3020 
3021   // Expose the DescriptorPool used to hold all descriptors added from generated
3022   // pb2.py files.
3023   // PyModule_AddObject steals a reference.
3024   Py_INCREF(GetDefaultDescriptorPool());
3025   PyModule_AddObject(m, "default_pool",
3026                      reinterpret_cast<PyObject*>(GetDefaultDescriptorPool()));
3027 
3028   PyModule_AddObject(m, "DescriptorPool", reinterpret_cast<PyObject*>(
3029       &PyDescriptorPool_Type));
3030 
3031   // This implementation provides full Descriptor types, we advertise it so that
3032   // descriptor.py can use them in replacement of the Python classes.
3033   PyModule_AddIntConstant(m, "_USE_C_DESCRIPTORS", 1);
3034 
3035   PyModule_AddObject(m, "Descriptor", reinterpret_cast<PyObject*>(
3036       &PyMessageDescriptor_Type));
3037   PyModule_AddObject(m, "FieldDescriptor", reinterpret_cast<PyObject*>(
3038       &PyFieldDescriptor_Type));
3039   PyModule_AddObject(m, "EnumDescriptor", reinterpret_cast<PyObject*>(
3040       &PyEnumDescriptor_Type));
3041   PyModule_AddObject(m, "EnumValueDescriptor", reinterpret_cast<PyObject*>(
3042       &PyEnumValueDescriptor_Type));
3043   PyModule_AddObject(m, "FileDescriptor", reinterpret_cast<PyObject*>(
3044       &PyFileDescriptor_Type));
3045   PyModule_AddObject(m, "OneofDescriptor", reinterpret_cast<PyObject*>(
3046       &PyOneofDescriptor_Type));
3047 
3048   PyObject* enum_type_wrapper = PyImport_ImportModule(
3049       "google.protobuf.internal.enum_type_wrapper");
3050   if (enum_type_wrapper == NULL) {
3051     return false;
3052   }
3053   EnumTypeWrapper_class =
3054       PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
3055   Py_DECREF(enum_type_wrapper);
3056 
3057   PyObject* message_module = PyImport_ImportModule(
3058       "google.protobuf.message");
3059   if (message_module == NULL) {
3060     return false;
3061   }
3062   EncodeError_class = PyObject_GetAttrString(message_module, "EncodeError");
3063   DecodeError_class = PyObject_GetAttrString(message_module, "DecodeError");
3064   PythonMessage_class = PyObject_GetAttrString(message_module, "Message");
3065   Py_DECREF(message_module);
3066 
3067   PyObject* pickle_module = PyImport_ImportModule("pickle");
3068   if (pickle_module == NULL) {
3069     return false;
3070   }
3071   PickleError_class = PyObject_GetAttrString(pickle_module, "PickleError");
3072   Py_DECREF(pickle_module);
3073 
3074   // Override {Get,Mutable}CProtoInsidePyProto.
3075   GetCProtoInsidePyProtoPtr = GetCProtoInsidePyProtoImpl;
3076   MutableCProtoInsidePyProtoPtr = MutableCProtoInsidePyProtoImpl;
3077 
3078   return true;
3079 }
3080 
3081 }  // namespace python
3082 }  // namespace protobuf
3083 
3084 static PyMethodDef ModuleMethods[] = {
3085   {"SetAllowOversizeProtos",
3086     (PyCFunction)google::protobuf::python::cmessage::SetAllowOversizeProtos,
3087     METH_O, "Enable/disable oversize proto parsing."},
3088   { NULL, NULL}
3089 };
3090 
3091 #if PY_MAJOR_VERSION >= 3
3092 static struct PyModuleDef _module = {
3093   PyModuleDef_HEAD_INIT,
3094   "_message",
3095   google::protobuf::python::module_docstring,
3096   -1,
3097   ModuleMethods,  /* m_methods */
3098   NULL,
3099   NULL,
3100   NULL,
3101   NULL
3102 };
3103 #define INITFUNC PyInit__message
3104 #define INITFUNC_ERRORVAL NULL
3105 #else  // Python 2
3106 #define INITFUNC init_message
3107 #define INITFUNC_ERRORVAL
3108 #endif
3109 
3110 extern "C" {
INITFUNC(void)3111   PyMODINIT_FUNC INITFUNC(void) {
3112     PyObject* m;
3113 #if PY_MAJOR_VERSION >= 3
3114     m = PyModule_Create(&_module);
3115 #else
3116     m = Py_InitModule3("_message", ModuleMethods,
3117                        google::protobuf::python::module_docstring);
3118 #endif
3119     if (m == NULL) {
3120       return INITFUNC_ERRORVAL;
3121     }
3122 
3123     if (!google::protobuf::python::InitProto2MessageModule(m)) {
3124       Py_DECREF(m);
3125       return INITFUNC_ERRORVAL;
3126     }
3127 
3128 #if PY_MAJOR_VERSION >= 3
3129     return m;
3130 #endif
3131   }
3132 }
3133 }  // namespace google
3134