1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2014 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include "protobuf.h"
32 
33 // -----------------------------------------------------------------------------
34 // Class/module creation from msgdefs and enumdefs, respectively.
35 // -----------------------------------------------------------------------------
36 
Message_data(void * msg)37 void* Message_data(void* msg) {
38   return ((uint8_t *)msg) + sizeof(MessageHeader);
39 }
40 
Message_mark(void * _self)41 void Message_mark(void* _self) {
42   MessageHeader* self = (MessageHeader *)_self;
43   layout_mark(self->descriptor->layout, Message_data(self));
44 }
45 
Message_free(void * self)46 void Message_free(void* self) {
47   xfree(self);
48 }
49 
50 rb_data_type_t Message_type = {
51   "Message",
52   { Message_mark, Message_free, NULL },
53 };
54 
Message_alloc(VALUE klass)55 VALUE Message_alloc(VALUE klass) {
56   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
57   Descriptor* desc = ruby_to_Descriptor(descriptor);
58   MessageHeader* msg = (MessageHeader*)ALLOC_N(
59       uint8_t, sizeof(MessageHeader) + desc->layout->size);
60   VALUE ret;
61 
62   memset(Message_data(msg), 0, desc->layout->size);
63 
64   // We wrap first so that everything in the message object is GC-rooted in case
65   // a collection happens during object creation in layout_init().
66   ret = TypedData_Wrap_Struct(klass, &Message_type, msg);
67   msg->descriptor = desc;
68   rb_ivar_set(ret, descriptor_instancevar_interned, descriptor);
69 
70   layout_init(desc->layout, Message_data(msg));
71 
72   return ret;
73 }
74 
which_oneof_field(MessageHeader * self,const upb_oneofdef * o)75 static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) {
76   upb_oneof_iter it;
77   size_t case_ofs;
78   uint32_t oneof_case;
79   const upb_fielddef* first_field;
80   const upb_fielddef* f;
81 
82   // If no fields in the oneof, always nil.
83   if (upb_oneofdef_numfields(o) == 0) {
84     return Qnil;
85   }
86   // Grab the first field in the oneof so we can get its layout info to find the
87   // oneof_case field.
88   upb_oneof_begin(&it, o);
89   assert(!upb_oneof_done(&it));
90   first_field = upb_oneof_iter_field(&it);
91   assert(upb_fielddef_containingoneof(first_field) != NULL);
92 
93   case_ofs =
94       self->descriptor->layout->
95       fields[upb_fielddef_index(first_field)].case_offset;
96   oneof_case = *((uint32_t*)((char*)Message_data(self) + case_ofs));
97 
98   if (oneof_case == ONEOF_CASE_NONE) {
99     return Qnil;
100   }
101 
102   // oneof_case is a field index, so find that field.
103   f = upb_oneofdef_itof(o, oneof_case);
104   assert(f != NULL);
105 
106   return ID2SYM(rb_intern(upb_fielddef_name(f)));
107 }
108 
109 /*
110  * call-seq:
111  *     Message.method_missing(*args)
112  *
113  * Provides accessors and setters for message fields according to their field
114  * names. For any field whose name does not conflict with a built-in method, an
115  * accessor is provided with the same name as the field, and a setter is
116  * provided with the name of the field plus the '=' suffix. Thus, given a
117  * message instance 'msg' with field 'foo', the following code is valid:
118  *
119  *     msg.foo = 42
120  *     puts msg.foo
121  *
122  * This method also provides read-only accessors for oneofs. If a oneof exists
123  * with name 'my_oneof', then msg.my_oneof will return a Ruby symbol equal to
124  * the name of the field in that oneof that is currently set, or nil if none.
125  */
Message_method_missing(int argc,VALUE * argv,VALUE _self)126 VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) {
127   MessageHeader* self;
128   VALUE method_name, method_str;
129   char* name;
130   size_t name_len;
131   bool setter;
132   const upb_oneofdef* o;
133   const upb_fielddef* f;
134 
135   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
136   if (argc < 1) {
137     rb_raise(rb_eArgError, "Expected method name as first argument.");
138   }
139   method_name = argv[0];
140   if (!SYMBOL_P(method_name)) {
141     rb_raise(rb_eArgError, "Expected symbol as method name.");
142   }
143   method_str = rb_id2str(SYM2ID(method_name));
144   name = RSTRING_PTR(method_str);
145   name_len = RSTRING_LEN(method_str);
146   setter = false;
147 
148   // Setters have names that end in '='.
149   if (name[name_len - 1] == '=') {
150     setter = true;
151     name_len--;
152   }
153 
154   // See if this name corresponds to either a oneof or field in this message.
155   if (!upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, &f,
156                              &o)) {
157     return rb_call_super(argc, argv);
158   }
159 
160   if (o != NULL) {
161     // This is a oneof -- return which field inside the oneof is set.
162     if (setter) {
163       rb_raise(rb_eRuntimeError, "Oneof accessors are read-only.");
164     }
165     return which_oneof_field(self, o);
166   } else {
167     // This is a field -- get or set the field's value.
168     assert(f);
169     if (setter) {
170       if (argc < 2) {
171         rb_raise(rb_eArgError, "No value provided to setter.");
172       }
173       layout_set(self->descriptor->layout, Message_data(self), f, argv[1]);
174       return Qnil;
175     } else {
176       return layout_get(self->descriptor->layout, Message_data(self), f);
177     }
178   }
179 }
180 
Message_initialize_kwarg(VALUE key,VALUE val,VALUE _self)181 int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) {
182   MessageHeader* self;
183   VALUE method_str;
184   char* name;
185   const upb_fielddef* f;
186   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
187 
188   if (!SYMBOL_P(key)) {
189     rb_raise(rb_eArgError,
190              "Expected symbols as hash keys in initialization map.");
191   }
192 
193   method_str = rb_id2str(SYM2ID(key));
194   name = RSTRING_PTR(method_str);
195   f = upb_msgdef_ntofz(self->descriptor->msgdef, name);
196   if (f == NULL) {
197     rb_raise(rb_eArgError,
198              "Unknown field name '%s' in initialization map entry.", name);
199   }
200 
201   if (is_map_field(f)) {
202     VALUE map;
203 
204     if (TYPE(val) != T_HASH) {
205       rb_raise(rb_eArgError,
206                "Expected Hash object as initializer value for map field '%s'.", name);
207     }
208     map = layout_get(self->descriptor->layout, Message_data(self), f);
209     Map_merge_into_self(map, val);
210   } else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
211     VALUE ary;
212 
213     if (TYPE(val) != T_ARRAY) {
214       rb_raise(rb_eArgError,
215                "Expected array as initializer value for repeated field '%s'.", name);
216     }
217     ary = layout_get(self->descriptor->layout, Message_data(self), f);
218     for (int i = 0; i < RARRAY_LEN(val); i++) {
219       RepeatedField_push(ary, rb_ary_entry(val, i));
220     }
221   } else {
222     layout_set(self->descriptor->layout, Message_data(self), f, val);
223   }
224   return 0;
225 }
226 
227 /*
228  * call-seq:
229  *     Message.new(kwargs) => new_message
230  *
231  * Creates a new instance of the given message class. Keyword arguments may be
232  * provided with keywords corresponding to field names.
233  *
234  * Note that no literal Message class exists. Only concrete classes per message
235  * type exist, as provided by the #msgclass method on Descriptors after they
236  * have been added to a pool. The method definitions described here on the
237  * Message class are provided on each concrete message class.
238  */
Message_initialize(int argc,VALUE * argv,VALUE _self)239 VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) {
240   VALUE hash_args;
241 
242   if (argc == 0) {
243     return Qnil;
244   }
245   if (argc != 1) {
246     rb_raise(rb_eArgError, "Expected 0 or 1 arguments.");
247   }
248   hash_args = argv[0];
249   if (TYPE(hash_args) != T_HASH) {
250     rb_raise(rb_eArgError, "Expected hash arguments.");
251   }
252 
253   rb_hash_foreach(hash_args, Message_initialize_kwarg, _self);
254   return Qnil;
255 }
256 
257 /*
258  * call-seq:
259  *     Message.dup => new_message
260  *
261  * Performs a shallow copy of this message and returns the new copy.
262  */
Message_dup(VALUE _self)263 VALUE Message_dup(VALUE _self) {
264   MessageHeader* self;
265   VALUE new_msg;
266   MessageHeader* new_msg_self;
267   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
268 
269   new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self));
270   TypedData_Get_Struct(new_msg, MessageHeader, &Message_type, new_msg_self);
271 
272   layout_dup(self->descriptor->layout,
273              Message_data(new_msg_self),
274              Message_data(self));
275 
276   return new_msg;
277 }
278 
279 // Internal only; used by Google::Protobuf.deep_copy.
Message_deep_copy(VALUE _self)280 VALUE Message_deep_copy(VALUE _self) {
281   MessageHeader* self;
282   MessageHeader* new_msg_self;
283   VALUE new_msg;
284   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
285 
286   new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self));
287   TypedData_Get_Struct(new_msg, MessageHeader, &Message_type, new_msg_self);
288 
289   layout_deep_copy(self->descriptor->layout,
290                    Message_data(new_msg_self),
291                    Message_data(self));
292 
293   return new_msg;
294 }
295 
296 /*
297  * call-seq:
298  *     Message.==(other) => boolean
299  *
300  * Performs a deep comparison of this message with another. Messages are equal
301  * if they have the same type and if each field is equal according to the :==
302  * method's semantics (a more efficient comparison may actually be done if the
303  * field is of a primitive type).
304  */
Message_eq(VALUE _self,VALUE _other)305 VALUE Message_eq(VALUE _self, VALUE _other) {
306   MessageHeader* self;
307   MessageHeader* other;
308   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
309   TypedData_Get_Struct(_other, MessageHeader, &Message_type, other);
310 
311   if (self->descriptor != other->descriptor) {
312     return Qfalse;
313   }
314 
315   return layout_eq(self->descriptor->layout,
316                    Message_data(self),
317                    Message_data(other));
318 }
319 
320 /*
321  * call-seq:
322  *     Message.hash => hash_value
323  *
324  * Returns a hash value that represents this message's field values.
325  */
Message_hash(VALUE _self)326 VALUE Message_hash(VALUE _self) {
327   MessageHeader* self;
328   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
329 
330   return layout_hash(self->descriptor->layout, Message_data(self));
331 }
332 
333 /*
334  * call-seq:
335  *     Message.inspect => string
336  *
337  * Returns a human-readable string representing this message. It will be
338  * formatted as "<MessageType: field1: value1, field2: value2, ...>". Each
339  * field's value is represented according to its own #inspect method.
340  */
Message_inspect(VALUE _self)341 VALUE Message_inspect(VALUE _self) {
342   MessageHeader* self;
343   VALUE str;
344   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
345 
346   str = rb_str_new2("<");
347   str = rb_str_append(str, rb_str_new2(rb_class2name(CLASS_OF(_self))));
348   str = rb_str_cat2(str, ": ");
349   str = rb_str_append(str, layout_inspect(
350       self->descriptor->layout, Message_data(self)));
351   str = rb_str_cat2(str, ">");
352   return str;
353 }
354 
355 
Message_to_h(VALUE _self)356 VALUE Message_to_h(VALUE _self) {
357   MessageHeader* self;
358   VALUE hash;
359   upb_msg_field_iter it;
360   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
361 
362   hash = rb_hash_new();
363 
364   for (upb_msg_field_begin(&it, self->descriptor->msgdef);
365        !upb_msg_field_done(&it);
366        upb_msg_field_next(&it)) {
367     const upb_fielddef* field = upb_msg_iter_field(&it);
368     VALUE msg_value = layout_get(self->descriptor->layout, Message_data(self),
369                                  field);
370     VALUE msg_key   = ID2SYM(rb_intern(upb_fielddef_name(field)));
371     if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
372       msg_value = RepeatedField_to_ary(msg_value);
373     }
374     rb_hash_aset(hash, msg_key, msg_value);
375   }
376   return hash;
377 }
378 
379 
380 
381 /*
382  * call-seq:
383  *     Message.[](index) => value
384  *
385  * Accesses a field's value by field name. The provided field name should be a
386  * string.
387  */
Message_index(VALUE _self,VALUE field_name)388 VALUE Message_index(VALUE _self, VALUE field_name) {
389   MessageHeader* self;
390   const upb_fielddef* field;
391   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
392   Check_Type(field_name, T_STRING);
393   field = upb_msgdef_ntofz(self->descriptor->msgdef, RSTRING_PTR(field_name));
394   if (field == NULL) {
395     return Qnil;
396   }
397   return layout_get(self->descriptor->layout, Message_data(self), field);
398 }
399 
400 /*
401  * call-seq:
402  *     Message.[]=(index, value)
403  *
404  * Sets a field's value by field name. The provided field name should be a
405  * string.
406  */
Message_index_set(VALUE _self,VALUE field_name,VALUE value)407 VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value) {
408   MessageHeader* self;
409   const upb_fielddef* field;
410   TypedData_Get_Struct(_self, MessageHeader, &Message_type, self);
411   Check_Type(field_name, T_STRING);
412   field = upb_msgdef_ntofz(self->descriptor->msgdef, RSTRING_PTR(field_name));
413   if (field == NULL) {
414     rb_raise(rb_eArgError, "Unknown field: %s", RSTRING_PTR(field_name));
415   }
416   layout_set(self->descriptor->layout, Message_data(self), field, value);
417   return Qnil;
418 }
419 
420 /*
421  * call-seq:
422  *     Message.descriptor => descriptor
423  *
424  * Class method that returns the Descriptor instance corresponding to this
425  * message class's type.
426  */
Message_descriptor(VALUE klass)427 VALUE Message_descriptor(VALUE klass) {
428   return rb_ivar_get(klass, descriptor_instancevar_interned);
429 }
430 
build_class_from_descriptor(Descriptor * desc)431 VALUE build_class_from_descriptor(Descriptor* desc) {
432   const char *name;
433   VALUE klass;
434 
435   if (desc->layout == NULL) {
436     desc->layout = create_layout(desc->msgdef);
437   }
438   if (desc->fill_method == NULL) {
439     desc->fill_method = new_fillmsg_decodermethod(desc, &desc->fill_method);
440   }
441 
442   name = upb_msgdef_fullname(desc->msgdef);
443   if (name == NULL) {
444     rb_raise(rb_eRuntimeError, "Descriptor does not have assigned name.");
445   }
446 
447   klass = rb_define_class_id(
448       // Docs say this parameter is ignored. User will assign return value to
449       // their own toplevel constant class name.
450       rb_intern("Message"),
451       rb_cObject);
452   rb_ivar_set(klass, descriptor_instancevar_interned,
453               get_def_obj(desc->msgdef));
454   rb_define_alloc_func(klass, Message_alloc);
455   rb_require("google/protobuf/message_exts");
456   rb_include_module(klass, rb_eval_string("Google::Protobuf::MessageExts"));
457   rb_extend_object(
458       klass, rb_eval_string("Google::Protobuf::MessageExts::ClassMethods"));
459 
460   rb_define_method(klass, "method_missing",
461                    Message_method_missing, -1);
462   rb_define_method(klass, "initialize", Message_initialize, -1);
463   rb_define_method(klass, "dup", Message_dup, 0);
464   // Also define #clone so that we don't inherit Object#clone.
465   rb_define_method(klass, "clone", Message_dup, 0);
466   rb_define_method(klass, "==", Message_eq, 1);
467   rb_define_method(klass, "hash", Message_hash, 0);
468   rb_define_method(klass, "to_h", Message_to_h, 0);
469   rb_define_method(klass, "to_hash", Message_to_h, 0);
470   rb_define_method(klass, "inspect", Message_inspect, 0);
471   rb_define_method(klass, "[]", Message_index, 1);
472   rb_define_method(klass, "[]=", Message_index_set, 2);
473   rb_define_singleton_method(klass, "decode", Message_decode, 1);
474   rb_define_singleton_method(klass, "encode", Message_encode, 1);
475   rb_define_singleton_method(klass, "decode_json", Message_decode_json, 1);
476   rb_define_singleton_method(klass, "encode_json", Message_encode_json, -1);
477   rb_define_singleton_method(klass, "descriptor", Message_descriptor, 0);
478 
479   return klass;
480 }
481 
482 /*
483  * call-seq:
484  *     Enum.lookup(number) => name
485  *
486  * This module method, provided on each generated enum module, looks up an enum
487  * value by number and returns its name as a Ruby symbol, or nil if not found.
488  */
enum_lookup(VALUE self,VALUE number)489 VALUE enum_lookup(VALUE self, VALUE number) {
490   int32_t num = NUM2INT(number);
491   VALUE desc = rb_ivar_get(self, descriptor_instancevar_interned);
492   EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(desc);
493 
494   const char* name = upb_enumdef_iton(enumdesc->enumdef, num);
495   if (name == NULL) {
496     return Qnil;
497   } else {
498     return ID2SYM(rb_intern(name));
499   }
500 }
501 
502 /*
503  * call-seq:
504  *     Enum.resolve(name) => number
505  *
506  * This module method, provided on each generated enum module, looks up an enum
507  * value by name (as a Ruby symbol) and returns its name, or nil if not found.
508  */
enum_resolve(VALUE self,VALUE sym)509 VALUE enum_resolve(VALUE self, VALUE sym) {
510   const char* name = rb_id2name(SYM2ID(sym));
511   VALUE desc = rb_ivar_get(self, descriptor_instancevar_interned);
512   EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(desc);
513 
514   int32_t num = 0;
515   bool found = upb_enumdef_ntoiz(enumdesc->enumdef, name, &num);
516   if (!found) {
517     return Qnil;
518   } else {
519     return INT2NUM(num);
520   }
521 }
522 
523 /*
524  * call-seq:
525  *     Enum.descriptor
526  *
527  * This module method, provided on each generated enum module, returns the
528  * EnumDescriptor corresponding to this enum type.
529  */
enum_descriptor(VALUE self)530 VALUE enum_descriptor(VALUE self) {
531   return rb_ivar_get(self, descriptor_instancevar_interned);
532 }
533 
build_module_from_enumdesc(EnumDescriptor * enumdesc)534 VALUE build_module_from_enumdesc(EnumDescriptor* enumdesc) {
535   VALUE mod = rb_define_module_id(
536       rb_intern(upb_enumdef_fullname(enumdesc->enumdef)));
537 
538   upb_enum_iter it;
539   for (upb_enum_begin(&it, enumdesc->enumdef);
540        !upb_enum_done(&it);
541        upb_enum_next(&it)) {
542     const char* name = upb_enum_iter_name(&it);
543     int32_t value = upb_enum_iter_number(&it);
544     if (name[0] < 'A' || name[0] > 'Z') {
545       rb_raise(rb_eTypeError,
546                "Enum value '%s' does not start with an uppercase letter "
547                "as is required for Ruby constants.",
548                name);
549     }
550     rb_define_const(mod, name, INT2NUM(value));
551   }
552 
553   rb_define_singleton_method(mod, "lookup", enum_lookup, 1);
554   rb_define_singleton_method(mod, "resolve", enum_resolve, 1);
555   rb_define_singleton_method(mod, "descriptor", enum_descriptor, 0);
556   rb_ivar_set(mod, descriptor_instancevar_interned,
557               get_def_obj(enumdesc->enumdef));
558 
559   return mod;
560 }
561 
562 /*
563  * call-seq:
564  *     Google::Protobuf.deep_copy(obj) => copy_of_obj
565  *
566  * Performs a deep copy of a RepeatedField instance, a Map instance, or a
567  * message object, recursively copying its members.
568  */
Google_Protobuf_deep_copy(VALUE self,VALUE obj)569 VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
570   VALUE klass = CLASS_OF(obj);
571   if (klass == cRepeatedField) {
572     return RepeatedField_deep_copy(obj);
573   } else if (klass == cMap) {
574     return Map_deep_copy(obj);
575   } else {
576     return Message_deep_copy(obj);
577   }
578 }
579