1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2015 Google Inc.  All rights reserved.
4 // https://developers.google.com/protocol-buffers/
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #endregion
32 
33 using Google.Protobuf.Reflection;
34 using Google.Protobuf.WellKnownTypes;
35 using System;
36 using System.Collections;
37 using System.Collections.Generic;
38 using System.Globalization;
39 using System.IO;
40 using System.Text;
41 using System.Text.RegularExpressions;
42 
43 namespace Google.Protobuf
44 {
45     /// <summary>
46     /// Reflection-based converter from JSON to messages.
47     /// </summary>
48     /// <remarks>
49     /// <para>
50     /// Instances of this class are thread-safe, with no mutable state.
51     /// </para>
52     /// <para>
53     /// This is a simple start to get JSON parsing working. As it's reflection-based,
54     /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
55     /// (This code is generally not heavily optimized.)
56     /// </para>
57     /// </remarks>
58     public sealed class JsonParser
59     {
60         // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
61         // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
62         private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
63         private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
64         private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
65         private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
66 
67         private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
68 
69         // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
70         // and the signatures of various methods.
71         private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
72             WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
73         {
74             { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
75             { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
76             { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
77             { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
78                 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
79             { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
80             { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
81             { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
82             { Int32Value.Descriptor.FullName, MergeWrapperField },
83             { Int64Value.Descriptor.FullName, MergeWrapperField },
84             { UInt32Value.Descriptor.FullName, MergeWrapperField },
85             { UInt64Value.Descriptor.FullName, MergeWrapperField },
86             { FloatValue.Descriptor.FullName, MergeWrapperField },
87             { DoubleValue.Descriptor.FullName, MergeWrapperField },
88             { BytesValue.Descriptor.FullName, MergeWrapperField },
89             { StringValue.Descriptor.FullName, MergeWrapperField }
90         };
91 
92         // Convenience method to avoid having to repeat the same code multiple times in the above
93         // dictionary initialization.
MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)94         private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
95         {
96             parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
97         }
98 
99         /// <summary>
100         /// Returns a formatter using the default settings.
101         /// </summary>
102         public static JsonParser Default { get { return defaultInstance; } }
103 
104         private readonly Settings settings;
105 
106         /// <summary>
107         /// Creates a new formatted with the given settings.
108         /// </summary>
109         /// <param name="settings">The settings.</param>
JsonParser(Settings settings)110         public JsonParser(Settings settings)
111         {
112             this.settings = settings;
113         }
114 
115         /// <summary>
116         /// Parses <paramref name="json"/> and merges the information into the given message.
117         /// </summary>
118         /// <param name="message">The message to merge the JSON information into.</param>
119         /// <param name="json">The JSON to parse.</param>
Merge(IMessage message, string json)120         internal void Merge(IMessage message, string json)
121         {
122             Merge(message, new StringReader(json));
123         }
124 
125         /// <summary>
126         /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
127         /// </summary>
128         /// <param name="message">The message to merge the JSON information into.</param>
129         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
Merge(IMessage message, TextReader jsonReader)130         internal void Merge(IMessage message, TextReader jsonReader)
131         {
132             var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
133             Merge(message, tokenizer);
134             var lastToken = tokenizer.Next();
135             if (lastToken != JsonToken.EndDocument)
136             {
137                 throw new InvalidProtocolBufferException("Expected end of JSON after object");
138             }
139         }
140 
141         /// <summary>
142         /// Merges the given message using data from the given tokenizer. In most cases, the next
143         /// token should be a "start object" token, but wrapper types and nullity can invalidate
144         /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
145         /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
146         /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
147         /// </summary>
Merge(IMessage message, JsonTokenizer tokenizer)148         private void Merge(IMessage message, JsonTokenizer tokenizer)
149         {
150             if (tokenizer.ObjectDepth > settings.RecursionLimit)
151             {
152                 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
153             }
154             if (message.Descriptor.IsWellKnownType)
155             {
156                 Action<JsonParser, IMessage, JsonTokenizer> handler;
157                 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
158                 {
159                     handler(this, message, tokenizer);
160                     return;
161                 }
162                 // Well-known types with no special handling continue in the normal way.
163             }
164             var token = tokenizer.Next();
165             if (token.Type != JsonToken.TokenType.StartObject)
166             {
167                 throw new InvalidProtocolBufferException("Expected an object");
168             }
169             var descriptor = message.Descriptor;
170             var jsonFieldMap = descriptor.Fields.ByJsonName();
171             // All the oneof fields we've already accounted for - we can only see each of them once.
172             // The set is created lazily to avoid the overhead of creating a set for every message
173             // we parsed, when oneofs are relatively rare.
174             HashSet<OneofDescriptor> seenOneofs = null;
175             while (true)
176             {
177                 token = tokenizer.Next();
178                 if (token.Type == JsonToken.TokenType.EndObject)
179                 {
180                     return;
181                 }
182                 if (token.Type != JsonToken.TokenType.Name)
183                 {
184                     throw new InvalidOperationException("Unexpected token type " + token.Type);
185                 }
186                 string name = token.StringValue;
187                 FieldDescriptor field;
188                 if (jsonFieldMap.TryGetValue(name, out field))
189                 {
190                     if (field.ContainingOneof != null)
191                     {
192                         if (seenOneofs == null)
193                         {
194                             seenOneofs = new HashSet<OneofDescriptor>();
195                         }
196                         if (!seenOneofs.Add(field.ContainingOneof))
197                         {
198                             throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
199                         }
200                     }
201                     MergeField(message, field, tokenizer);
202                 }
203                 else
204                 {
205                     // TODO: Is this what we want to do? If not, we'll need to skip the value,
206                     // which may be an object or array. (We might want to put code in the tokenizer
207                     // to do that.)
208                     throw new InvalidProtocolBufferException("Unknown field: " + name);
209                 }
210             }
211         }
212 
MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)213         private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
214         {
215             var token = tokenizer.Next();
216             if (token.Type == JsonToken.TokenType.Null)
217             {
218                 // Clear the field if we see a null token, unless it's for a singular field of type
219                 // google.protobuf.Value.
220                 // Note: different from Java API, which just ignores it.
221                 // TODO: Bring it more in line? Discuss...
222                 if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field))
223                 {
224                     field.Accessor.Clear(message);
225                     return;
226                 }
227             }
228             tokenizer.PushBack(token);
229 
230             if (field.IsMap)
231             {
232                 MergeMapField(message, field, tokenizer);
233             }
234             else if (field.IsRepeated)
235             {
236                 MergeRepeatedField(message, field, tokenizer);
237             }
238             else
239             {
240                 var value = ParseSingleValue(field, tokenizer);
241                 field.Accessor.SetValue(message, value);
242             }
243         }
244 
MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)245         private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
246         {
247             var token = tokenizer.Next();
248             if (token.Type != JsonToken.TokenType.StartArray)
249             {
250                 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
251             }
252 
253             IList list = (IList) field.Accessor.GetValue(message);
254             while (true)
255             {
256                 token = tokenizer.Next();
257                 if (token.Type == JsonToken.TokenType.EndArray)
258                 {
259                     return;
260                 }
261                 tokenizer.PushBack(token);
262                 if (token.Type == JsonToken.TokenType.Null)
263                 {
264                     throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
265                 }
266                 list.Add(ParseSingleValue(field, tokenizer));
267             }
268         }
269 
MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)270         private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
271         {
272             // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
273             var token = tokenizer.Next();
274             if (token.Type != JsonToken.TokenType.StartObject)
275             {
276                 throw new InvalidProtocolBufferException("Expected an object to populate a map");
277             }
278 
279             var type = field.MessageType;
280             var keyField = type.FindFieldByNumber(1);
281             var valueField = type.FindFieldByNumber(2);
282             if (keyField == null || valueField == null)
283             {
284                 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
285             }
286             IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
287 
288             while (true)
289             {
290                 token = tokenizer.Next();
291                 if (token.Type == JsonToken.TokenType.EndObject)
292                 {
293                     return;
294                 }
295                 object key = ParseMapKey(keyField, token.StringValue);
296                 object value = ParseSingleValue(valueField, tokenizer);
297                 if (value == null)
298                 {
299                     throw new InvalidProtocolBufferException("Map values must not be null");
300                 }
301                 dictionary[key] = value;
302             }
303         }
304 
IsGoogleProtobufValueField(FieldDescriptor field)305         private static bool IsGoogleProtobufValueField(FieldDescriptor field)
306         {
307             return field.FieldType == FieldType.Message &&
308                 field.MessageType.FullName == Value.Descriptor.FullName;
309         }
310 
ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)311         private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
312         {
313             var token = tokenizer.Next();
314             if (token.Type == JsonToken.TokenType.Null)
315             {
316                 // TODO: In order to support dynamic messages, we should really build this up
317                 // dynamically.
318                 if (IsGoogleProtobufValueField(field))
319                 {
320                     return Value.ForNull();
321                 }
322                 return null;
323             }
324 
325             var fieldType = field.FieldType;
326             if (fieldType == FieldType.Message)
327             {
328                 // Parse wrapper types as their constituent types.
329                 // TODO: What does this mean for null?
330                 if (field.MessageType.IsWrapperType)
331                 {
332                     field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
333                     fieldType = field.FieldType;
334                 }
335                 else
336                 {
337                     // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
338                     tokenizer.PushBack(token);
339                     IMessage subMessage = NewMessageForField(field);
340                     Merge(subMessage, tokenizer);
341                     return subMessage;
342                 }
343             }
344 
345             switch (token.Type)
346             {
347                 case JsonToken.TokenType.True:
348                 case JsonToken.TokenType.False:
349                     if (fieldType == FieldType.Bool)
350                     {
351                         return token.Type == JsonToken.TokenType.True;
352                     }
353                     // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
354                     // case instead, but this way we'd only need to change one place.
355                     goto default;
356                 case JsonToken.TokenType.StringValue:
357                     return ParseSingleStringValue(field, token.StringValue);
358                 // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
359                 case JsonToken.TokenType.Number:
360                     return ParseSingleNumberValue(field, token);
361                 case JsonToken.TokenType.Null:
362                     throw new NotImplementedException("Haven't worked out what to do for null yet");
363                 default:
364                     throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
365             }
366         }
367 
368         /// <summary>
369         /// Parses <paramref name="json"/> into a new message.
370         /// </summary>
371         /// <typeparam name="T">The type of message to create.</typeparam>
372         /// <param name="json">The JSON to parse.</param>
373         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
374         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
375         public T Parse<T>(string json) where T : IMessage, new()
376         {
377             ProtoPreconditions.CheckNotNull(json, nameof(json));
378             return Parse<T>(new StringReader(json));
379         }
380 
381         /// <summary>
382         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
383         /// </summary>
384         /// <typeparam name="T">The type of message to create.</typeparam>
385         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
386         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
387         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
388         public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
389         {
390             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
391             T message = new T();
392             Merge(message, jsonReader);
393             return message;
394         }
395 
396         /// <summary>
397         /// Parses <paramref name="json"/> into a new message.
398         /// </summary>
399         /// <param name="json">The JSON to parse.</param>
400         /// <param name="descriptor">Descriptor of message type to parse.</param>
401         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
402         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(string json, MessageDescriptor descriptor)403         public IMessage Parse(string json, MessageDescriptor descriptor)
404         {
405             ProtoPreconditions.CheckNotNull(json, nameof(json));
406             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
407             return Parse(new StringReader(json), descriptor);
408         }
409 
410         /// <summary>
411         /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
412         /// </summary>
413         /// <param name="jsonReader">Reader providing the JSON to parse.</param>
414         /// <param name="descriptor">Descriptor of message type to parse.</param>
415         /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
416         /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
Parse(TextReader jsonReader, MessageDescriptor descriptor)417         public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
418         {
419             ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
420             ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
421             IMessage message = descriptor.Parser.CreateTemplate();
422             Merge(message, jsonReader);
423             return message;
424         }
425 
MergeStructValue(IMessage message, JsonTokenizer tokenizer)426         private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
427         {
428             var firstToken = tokenizer.Next();
429             var fields = message.Descriptor.Fields;
430             switch (firstToken.Type)
431             {
432                 case JsonToken.TokenType.Null:
433                     fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
434                     return;
435                 case JsonToken.TokenType.StringValue:
436                     fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
437                     return;
438                 case JsonToken.TokenType.Number:
439                     fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
440                     return;
441                 case JsonToken.TokenType.False:
442                 case JsonToken.TokenType.True:
443                     fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
444                     return;
445                 case JsonToken.TokenType.StartObject:
446                     {
447                         var field = fields[Value.StructValueFieldNumber];
448                         var structMessage = NewMessageForField(field);
449                         tokenizer.PushBack(firstToken);
450                         Merge(structMessage, tokenizer);
451                         field.Accessor.SetValue(message, structMessage);
452                         return;
453                     }
454                 case JsonToken.TokenType.StartArray:
455                     {
456                         var field = fields[Value.ListValueFieldNumber];
457                         var list = NewMessageForField(field);
458                         tokenizer.PushBack(firstToken);
459                         Merge(list, tokenizer);
460                         field.Accessor.SetValue(message, list);
461                         return;
462                     }
463                 default:
464                     throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
465             }
466         }
467 
MergeStruct(IMessage message, JsonTokenizer tokenizer)468         private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
469         {
470             var token = tokenizer.Next();
471             if (token.Type != JsonToken.TokenType.StartObject)
472             {
473                 throw new InvalidProtocolBufferException("Expected object value for Struct");
474             }
475             tokenizer.PushBack(token);
476 
477             var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
478             MergeMapField(message, field, tokenizer);
479         }
480 
MergeAny(IMessage message, JsonTokenizer tokenizer)481         private void MergeAny(IMessage message, JsonTokenizer tokenizer)
482         {
483             // Record the token stream until we see the @type property. At that point, we can take the value, consult
484             // the type registry for the relevant message, and replay the stream, omitting the @type property.
485             var tokens = new List<JsonToken>();
486 
487             var token = tokenizer.Next();
488             if (token.Type != JsonToken.TokenType.StartObject)
489             {
490                 throw new InvalidProtocolBufferException("Expected object value for Any");
491             }
492             int typeUrlObjectDepth = tokenizer.ObjectDepth;
493 
494             // The check for the property depth protects us from nested Any values which occur before the type URL
495             // for *this* Any.
496             while (token.Type != JsonToken.TokenType.Name ||
497                 token.StringValue != JsonFormatter.AnyTypeUrlField ||
498                 tokenizer.ObjectDepth != typeUrlObjectDepth)
499             {
500                 tokens.Add(token);
501                 token = tokenizer.Next();
502 
503                 if (tokenizer.ObjectDepth < typeUrlObjectDepth)
504                 {
505                     throw new InvalidProtocolBufferException("Any message with no @type");
506                 }
507             }
508 
509             // Don't add the @type property or its value to the recorded token list
510             token = tokenizer.Next();
511             if (token.Type != JsonToken.TokenType.StringValue)
512             {
513                 throw new InvalidProtocolBufferException("Expected string value for Any.@type");
514             }
515             string typeUrl = token.StringValue;
516             string typeName = Any.GetTypeName(typeUrl);
517 
518             MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
519             if (descriptor == null)
520             {
521                 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
522             }
523 
524             // Now replay the token stream we've already read and anything that remains of the object, just parsing it
525             // as normal. Our original tokenizer should end up at the end of the object.
526             var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
527             var body = descriptor.Parser.CreateTemplate();
528             if (descriptor.IsWellKnownType)
529             {
530                 MergeWellKnownTypeAnyBody(body, replay);
531             }
532             else
533             {
534                 Merge(body, replay);
535             }
536             var data = body.ToByteString();
537 
538             // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
539             message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
540             message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
541         }
542 
543         // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
544         // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
545         // itself, and then end-object.
MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)546         private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
547         {
548             var token = tokenizer.Next(); // Definitely start-object; checked in previous method
549             token = tokenizer.Next();
550             // TODO: What about an absent Int32Value, for example?
551             if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
552             {
553                 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
554             }
555             Merge(body, tokenizer);
556             token = tokenizer.Next();
557             if (token.Type != JsonToken.TokenType.EndObject)
558             {
559                 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
560             }
561         }
562 
563         #region Utility methods which don't depend on the state (or settings) of the parser.
ParseMapKey(FieldDescriptor field, string keyText)564         private static object ParseMapKey(FieldDescriptor field, string keyText)
565         {
566             switch (field.FieldType)
567             {
568                 case FieldType.Bool:
569                     if (keyText == "true")
570                     {
571                         return true;
572                     }
573                     if (keyText == "false")
574                     {
575                         return false;
576                     }
577                     throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
578                 case FieldType.String:
579                     return keyText;
580                 case FieldType.Int32:
581                 case FieldType.SInt32:
582                 case FieldType.SFixed32:
583                     return ParseNumericString(keyText, int.Parse);
584                 case FieldType.UInt32:
585                 case FieldType.Fixed32:
586                     return ParseNumericString(keyText, uint.Parse);
587                 case FieldType.Int64:
588                 case FieldType.SInt64:
589                 case FieldType.SFixed64:
590                     return ParseNumericString(keyText, long.Parse);
591                 case FieldType.UInt64:
592                 case FieldType.Fixed64:
593                     return ParseNumericString(keyText, ulong.Parse);
594                 default:
595                     throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
596             }
597         }
598 
599         private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
600         {
601             double value = token.NumberValue;
602             checked
603             {
604                 try
605                 {
606                     switch (field.FieldType)
607                     {
608                         case FieldType.Int32:
609                         case FieldType.SInt32:
610                         case FieldType.SFixed32:
611                             CheckInteger(value);
612                             return (int) value;
613                         case FieldType.UInt32:
614                         case FieldType.Fixed32:
615                             CheckInteger(value);
616                             return (uint) value;
617                         case FieldType.Int64:
618                         case FieldType.SInt64:
619                         case FieldType.SFixed64:
620                             CheckInteger(value);
621                             return (long) value;
622                         case FieldType.UInt64:
623                         case FieldType.Fixed64:
624                             CheckInteger(value);
625                             return (ulong) value;
626                         case FieldType.Double:
627                             return value;
628                         case FieldType.Float:
629                             if (double.IsNaN(value))
630                             {
631                                 return float.NaN;
632                             }
633                             if (value > float.MaxValue || value < float.MinValue)
634                             {
635                                 if (double.IsPositiveInfinity(value))
636                                 {
637                                     return float.PositiveInfinity;
638                                 }
639                                 if (double.IsNegativeInfinity(value))
640                                 {
641                                     return float.NegativeInfinity;
642                                 }
643                                 throw new InvalidProtocolBufferException($"Value out of range: {value}");
644                             }
645                             return (float) value;
646                         case FieldType.Enum:
647                             CheckInteger(value);
648                             // Just return it as an int, and let the CLR convert it.
649                             // Note that we deliberately don't check that it's a known value.
650                             return (int) value;
651                         default:
652                             throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
653                     }
654                 }
655                 catch (OverflowException)
656                 {
657                     throw new InvalidProtocolBufferException($"Value out of range: {value}");
658                 }
659             }
660         }
661 
662         private static void CheckInteger(double value)
663         {
664             if (double.IsInfinity(value) || double.IsNaN(value))
665             {
666                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
667             }
668             if (value != Math.Floor(value))
669             {
670                 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
671             }
672         }
673 
674         private static object ParseSingleStringValue(FieldDescriptor field, string text)
675         {
676             switch (field.FieldType)
677             {
678                 case FieldType.String:
679                     return text;
680                 case FieldType.Bytes:
681                     try
682                     {
683                         return ByteString.FromBase64(text);
684                     }
685                     catch (FormatException e)
686                     {
687                         throw InvalidProtocolBufferException.InvalidBase64(e);
688                     }
689                 case FieldType.Int32:
690                 case FieldType.SInt32:
691                 case FieldType.SFixed32:
692                     return ParseNumericString(text, int.Parse);
693                 case FieldType.UInt32:
694                 case FieldType.Fixed32:
695                     return ParseNumericString(text, uint.Parse);
696                 case FieldType.Int64:
697                 case FieldType.SInt64:
698                 case FieldType.SFixed64:
699                     return ParseNumericString(text, long.Parse);
700                 case FieldType.UInt64:
701                 case FieldType.Fixed64:
702                     return ParseNumericString(text, ulong.Parse);
703                 case FieldType.Double:
704                     double d = ParseNumericString(text, double.Parse);
705                     ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
706                     return d;
707                 case FieldType.Float:
708                     float f = ParseNumericString(text, float.Parse);
709                     ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
710                     return f;
711                 case FieldType.Enum:
712                     var enumValue = field.EnumType.FindValueByName(text);
713                     if (enumValue == null)
714                     {
715                         throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
716                     }
717                     // Just return it as an int, and let the CLR convert it.
718                     return enumValue.Number;
719                 default:
720                     throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
721             }
722         }
723 
724         /// <summary>
725         /// Creates a new instance of the message type for the given field.
726         /// </summary>
727         private static IMessage NewMessageForField(FieldDescriptor field)
728         {
729             return field.MessageType.Parser.CreateTemplate();
730         }
731 
732         private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
733         {
734             // Can't prohibit this with NumberStyles.
735             if (text.StartsWith("+"))
736             {
737                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
738             }
739             if (text.StartsWith("0") && text.Length > 1)
740             {
741                 if (text[1] >= '0' && text[1] <= '9')
742                 {
743                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
744                 }
745             }
746             else if (text.StartsWith("-0") && text.Length > 2)
747             {
748                 if (text[2] >= '0' && text[2] <= '9')
749                 {
750                     throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
751                 }
752             }
753             try
754             {
755                 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
756             }
757             catch (FormatException)
758             {
759                 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
760             }
761             catch (OverflowException)
762             {
763                 throw new InvalidProtocolBufferException($"Value out of range: {text}");
764             }
765         }
766 
767         /// <summary>
768         /// Checks that any infinite/NaN values originated from the correct text.
769         /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
770         /// way that Mono parses out-of-range values as infinity.
771         /// </summary>
ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)772         private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
773         {
774             if ((isPositiveInfinity && text != "Infinity") ||
775                 (isNegativeInfinity && text != "-Infinity") ||
776                 (isNaN && text != "NaN"))
777             {
778                 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
779             }
780         }
781 
MergeTimestamp(IMessage message, JsonToken token)782         private static void MergeTimestamp(IMessage message, JsonToken token)
783         {
784             if (token.Type != JsonToken.TokenType.StringValue)
785             {
786                 throw new InvalidProtocolBufferException("Expected string value for Timestamp");
787             }
788             var match = TimestampRegex.Match(token.StringValue);
789             if (!match.Success)
790             {
791                 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
792             }
793             var dateTime = match.Groups["datetime"].Value;
794             var subseconds = match.Groups["subseconds"].Value;
795             var offset = match.Groups["offset"].Value;
796 
797             try
798             {
799                 DateTime parsed = DateTime.ParseExact(
800                     dateTime,
801                     "yyyy-MM-dd'T'HH:mm:ss",
802                     CultureInfo.InvariantCulture,
803                     DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
804                 // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
805                 Timestamp timestamp = Timestamp.FromDateTime(parsed);
806                 int nanosToAdd = 0;
807                 if (subseconds != "")
808                 {
809                     // This should always work, as we've got 1-9 digits.
810                     int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
811                     nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
812                 }
813                 int secondsToAdd = 0;
814                 if (offset != "Z")
815                 {
816                     // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
817                     int sign = offset[0] == '-' ? 1 : -1;
818                     int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
819                     int minutes = int.Parse(offset.Substring(4, 2));
820                     int totalMinutes = hours * 60 + minutes;
821                     if (totalMinutes > 18 * 60)
822                     {
823                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
824                     }
825                     if (totalMinutes == 0 && sign == 1)
826                     {
827                         // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
828                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
829                     }
830                     // We need to *subtract* the offset from local time to get UTC.
831                     secondsToAdd = sign * totalMinutes * 60;
832                 }
833                 // Ensure we've got the right signs. Currently unnecessary, but easy to do.
834                 if (secondsToAdd < 0 && nanosToAdd > 0)
835                 {
836                     secondsToAdd++;
837                     nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
838                 }
839                 if (secondsToAdd != 0 || nanosToAdd != 0)
840                 {
841                     timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
842                     // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
843                     // anywhere, but we shouldn't parse it.
844                     if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
845                     {
846                         throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
847                     }
848                 }
849                 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
850                 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
851             }
852             catch (FormatException)
853             {
854                 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
855             }
856         }
857 
MergeDuration(IMessage message, JsonToken token)858         private static void MergeDuration(IMessage message, JsonToken token)
859         {
860             if (token.Type != JsonToken.TokenType.StringValue)
861             {
862                 throw new InvalidProtocolBufferException("Expected string value for Duration");
863             }
864             var match = DurationRegex.Match(token.StringValue);
865             if (!match.Success)
866             {
867                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
868             }
869             var sign = match.Groups["sign"].Value;
870             var secondsText = match.Groups["int"].Value;
871             // Prohibit leading insignficant zeroes
872             if (secondsText[0] == '0' && secondsText.Length > 1)
873             {
874                 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
875             }
876             var subseconds = match.Groups["subseconds"].Value;
877             var multiplier = sign == "-" ? -1 : 1;
878 
879             try
880             {
881                 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
882                 int nanos = 0;
883                 if (subseconds != "")
884                 {
885                     // This should always work, as we've got 1-9 digits.
886                     int parsedFraction = int.Parse(subseconds.Substring(1));
887                     nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
888                 }
889                 if (!Duration.IsNormalized(seconds, nanos))
890                 {
891                     throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
892                 }
893                 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
894                 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
895             }
896             catch (FormatException)
897             {
898                 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
899             }
900         }
901 
MergeFieldMask(IMessage message, JsonToken token)902         private static void MergeFieldMask(IMessage message, JsonToken token)
903         {
904             if (token.Type != JsonToken.TokenType.StringValue)
905             {
906                 throw new InvalidProtocolBufferException("Expected string value for FieldMask");
907             }
908             // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
909             string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
910             IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
911             foreach (var path in jsonPaths)
912             {
913                 messagePaths.Add(ToSnakeCase(path));
914             }
915         }
916 
917         // Ported from src/google/protobuf/util/internal/utility.cc
ToSnakeCase(string text)918         private static string ToSnakeCase(string text)
919         {
920             var builder = new StringBuilder(text.Length * 2);
921             // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
922             // C++, whilst still throwing an exception on underscores.
923             bool wasNotUnderscore = false;  // Initialize to false for case 1 (below)
924             bool wasNotCap = false;
925 
926             for (int i = 0; i < text.Length; i++)
927             {
928                 char c = text[i];
929                 if (c >= 'A' && c <= 'Z') // ascii_isupper
930                 {
931                     // Consider when the current character B is capitalized:
932                     // 1) At beginning of input:   "B..." => "b..."
933                     //    (e.g. "Biscuit" => "biscuit")
934                     // 2) Following a lowercase:   "...aB..." => "...a_b..."
935                     //    (e.g. "gBike" => "g_bike")
936                     // 3) At the end of input:     "...AB" => "...ab"
937                     //    (e.g. "GoogleLAB" => "google_lab")
938                     // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
939                     //    (e.g. "GBike" => "g_bike")
940                     if (wasNotUnderscore &&               //            case 1 out
941                         (wasNotCap ||                     // case 2 in, case 3 out
942                          (i + 1 < text.Length &&         //            case 3 out
943                           (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
944                     {  // case 4 in
945                        // We add an underscore for case 2 and case 4.
946                         builder.Append('_');
947                     }
948                     // ascii_tolower, but we already know that c *is* an upper case ASCII character...
949                     builder.Append((char) (c + 'a' - 'A'));
950                     wasNotUnderscore = true;
951                     wasNotCap = false;
952                 }
953                 else
954                 {
955                     builder.Append(c);
956                     if (c == '_')
957                     {
958                         throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
959                     }
960                     wasNotUnderscore = true;
961                     wasNotCap = true;
962                 }
963             }
964             return builder.ToString();
965         }
966         #endregion
967 
968         /// <summary>
969         /// Settings controlling JSON parsing.
970         /// </summary>
971         public sealed class Settings
972         {
973             /// <summary>
974             /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
975             /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
976             /// </summary>
977             public static Settings Default { get; }
978 
979             // Workaround for the Mono compiler complaining about XML comments not being on
980             // valid language elements.
Settings()981             static Settings()
982             {
983                 Default = new Settings(CodedInputStream.DefaultRecursionLimit);
984             }
985 
986             /// <summary>
987             /// The maximum depth of messages to parse. Note that this limit only applies to parsing
988             /// messages, not collections - so a message within a collection within a message only counts as
989             /// depth 2, not 3.
990             /// </summary>
991             public int RecursionLimit { get; }
992 
993             /// <summary>
994             /// The type registry used to parse <see cref="Any"/> messages.
995             /// </summary>
996             public TypeRegistry TypeRegistry { get; }
997 
998             /// <summary>
999             /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
1000             /// </summary>
1001             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
Settings(int recursionLimit)1002             public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
1003             {
1004             }
1005 
1006             /// <summary>
1007             /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
1008             /// </summary>
1009             /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1010             /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
Settings(int recursionLimit, TypeRegistry typeRegistry)1011             public Settings(int recursionLimit, TypeRegistry typeRegistry)
1012             {
1013                 RecursionLimit = recursionLimit;
1014                 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
1015             }
1016         }
1017     }
1018 }
1019