1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2008 Google Inc.  All rights reserved.
4 // https://developers.google.com/protocol-buffers/
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #endregion
32 using NUnit.Framework;
33 using System;
34 using System.IO;
35 
36 namespace Google.Protobuf
37 {
38     public class JsonTokenizerTest
39     {
40         [Test]
EmptyObjectValue()41         public void EmptyObjectValue()
42         {
43             AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject);
44         }
45 
46         [Test]
EmptyArrayValue()47         public void EmptyArrayValue()
48         {
49             AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray);
50         }
51 
52         [Test]
53         [TestCase("foo", "foo")]
54         [TestCase("tab\\t", "tab\t")]
55         [TestCase("line\\nfeed", "line\nfeed")]
56         [TestCase("carriage\\rreturn", "carriage\rreturn")]
57         [TestCase("back\\bspace", "back\bspace")]
58         [TestCase("form\\ffeed", "form\ffeed")]
59         [TestCase("escaped\\/slash", "escaped/slash")]
60         [TestCase("escaped\\\\backslash", "escaped\\backslash")]
61         [TestCase("escaped\\\"quote", "escaped\"quote")]
62         [TestCase("foo {}[] bar", "foo {}[] bar")]
63         [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex
64         [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")]
65         [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")]
StringValue(string json, string expectedValue)66         public void StringValue(string json, string expectedValue)
67         {
68             AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue));
69         }
70 
71         // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed
72         // using TestCase as they have no valid UTF-8 representation.
73         // It's unclear exactly how we should handle a mixture of escaped or not: that can't
74         // come from UTF-8 text, but could come from a .NET string. For the moment,
75         // treat it as valid in the obvious way.
76         [Test]
MixedSurrogatePairs()77         public void MixedSurrogatePairs()
78         {
79             string expected = "\ud800\udc00";
80             AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected));
81             AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
82         }
83 
84         [Test]
ObjectDepth()85         public void ObjectDepth()
86         {
87             string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
88             var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
89             // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
90             Assert.AreEqual(0, tokenizer.ObjectDepth);
91             Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
92             Assert.AreEqual(1, tokenizer.ObjectDepth);
93             Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next());
94             Assert.AreEqual(1, tokenizer.ObjectDepth);
95             Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
96             Assert.AreEqual(2, tokenizer.ObjectDepth);
97             Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next());
98             Assert.AreEqual(2, tokenizer.ObjectDepth);
99             Assert.AreEqual(JsonToken.Value(1), tokenizer.Next());
100             Assert.AreEqual(2, tokenizer.ObjectDepth);
101             Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next());
102             Assert.AreEqual(2, tokenizer.ObjectDepth);
103             Assert.AreEqual(JsonToken.StartArray, tokenizer.Next());
104             Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array
105             Assert.AreEqual(JsonToken.Value(0), tokenizer.Next());
106             Assert.AreEqual(2, tokenizer.ObjectDepth);
107             Assert.AreEqual(JsonToken.EndArray, tokenizer.Next());
108             Assert.AreEqual(2, tokenizer.ObjectDepth);
109             Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
110             Assert.AreEqual(1, tokenizer.ObjectDepth);
111             Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
112             Assert.AreEqual(0, tokenizer.ObjectDepth);
113             Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
114             Assert.AreEqual(0, tokenizer.ObjectDepth);
115         }
116 
117         [Test]
ObjectDepth_WithPushBack()118         public void ObjectDepth_WithPushBack()
119         {
120             string json = "{}";
121             var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
122             Assert.AreEqual(0, tokenizer.ObjectDepth);
123             var token = tokenizer.Next();
124             Assert.AreEqual(1, tokenizer.ObjectDepth);
125             // When we push back a "start object", we should effectively be back to the previous depth.
126             tokenizer.PushBack(token);
127             Assert.AreEqual(0, tokenizer.ObjectDepth);
128             // Read the same token again, and get back to depth 1
129             token = tokenizer.Next();
130             Assert.AreEqual(1, tokenizer.ObjectDepth);
131 
132             // Now the same in reverse, with EndObject
133             token = tokenizer.Next();
134             Assert.AreEqual(0, tokenizer.ObjectDepth);
135             tokenizer.PushBack(token);
136             Assert.AreEqual(1, tokenizer.ObjectDepth);
137             tokenizer.Next();
138             Assert.AreEqual(0, tokenizer.ObjectDepth);
139         }
140 
141         [Test]
142         [TestCase("embedded tab\t")]
143         [TestCase("embedded CR\r")]
144         [TestCase("embedded LF\n")]
145         [TestCase("embedded bell\u0007")]
146         [TestCase("bad escape\\a")]
147         [TestCase("incomplete escape\\")]
148         [TestCase("incomplete Unicode escape\\u000")]
149         [TestCase("invalid Unicode escape\\u000H")]
150         // Surrogate pair handling, both in raw .NET strings and escaped. We only need
151         // to detect this in strings, as non-ASCII characters anywhere other than in strings
152         // will already lead to parsing errors.
153         [TestCase("\\ud800")]
154         [TestCase("\\udc00")]
155         [TestCase("\\ud800x")]
156         [TestCase("\\udc00x")]
157         [TestCase("\\udc00\\ud800y")]
InvalidStringValue(string json)158         public void InvalidStringValue(string json)
159         {
160             AssertThrowsAfter("\"" + json + "\"");
161         }
162 
163         // Tests for invalid strings that can't be expressed in attributes,
164         // as the constants can't be expressed as UTF-8 strings.
165         [Test]
InvalidSurrogatePairs()166         public void InvalidSurrogatePairs()
167         {
168             AssertThrowsAfter("\"\ud800x\"");
169             AssertThrowsAfter("\"\udc00y\"");
170             AssertThrowsAfter("\"\udc00\ud800y\"");
171         }
172 
173         [Test]
174         [TestCase("0", 0)]
175         [TestCase("-0", 0)] // We don't distinguish between positive and negative 0
176         [TestCase("1", 1)]
177         [TestCase("-1", -1)]
178         // From here on, assume leading sign is okay...
179         [TestCase("1.125", 1.125)]
180         [TestCase("1.0", 1)]
181         [TestCase("1e5", 100000)]
182         [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec
183         [TestCase("1E5", 100000)]
184         [TestCase("1e+5", 100000)]
185         [TestCase("1E-5", 0.00001)]
186         [TestCase("123E-2", 1.23)]
187         [TestCase("123.45E3", 123450)]
188         [TestCase("   1   ", 1)]
NumberValue(string json, double expectedValue)189         public void NumberValue(string json, double expectedValue)
190         {
191             AssertTokens(json, JsonToken.Value(expectedValue));
192         }
193 
194         [Test]
195         [TestCase("00")]
196         [TestCase(".5")]
197         [TestCase("1.")]
198         [TestCase("1e")]
199         [TestCase("1e-")]
200         [TestCase("--")]
201         [TestCase("--1")]
202         [TestCase("-1.7977e308")]
203         [TestCase("1.7977e308")]
InvalidNumberValue(string json)204         public void InvalidNumberValue(string json)
205         {
206             AssertThrowsAfter(json);
207         }
208 
209         [Test]
210         [TestCase("nul")]
211         [TestCase("nothing")]
212         [TestCase("truth")]
213         [TestCase("fALSEhood")]
InvalidLiterals(string json)214         public void InvalidLiterals(string json)
215         {
216             AssertThrowsAfter(json);
217         }
218 
219         [Test]
NullValue()220         public void NullValue()
221         {
222             AssertTokens("null", JsonToken.Null);
223         }
224 
225         [Test]
TrueValue()226         public void TrueValue()
227         {
228             AssertTokens("true", JsonToken.True);
229         }
230 
231         [Test]
FalseValue()232         public void FalseValue()
233         {
234             AssertTokens("false", JsonToken.False);
235         }
236 
237         [Test]
SimpleObject()238         public void SimpleObject()
239         {
240             AssertTokens("{'x': 'y'}",
241                 JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject);
242         }
243 
244         [Test]
245         [TestCase("[10, 20", 3)]
246         [TestCase("[10,", 2)]
247         [TestCase("[10:20]", 2)]
248         [TestCase("[", 1)]
249         [TestCase("[,", 1)]
250         [TestCase("{", 1)]
251         [TestCase("{,", 1)]
252         [TestCase("{[", 1)]
253         [TestCase("{{", 1)]
254         [TestCase("{0", 1)]
255         [TestCase("{null", 1)]
256         [TestCase("{false", 1)]
257         [TestCase("{true", 1)]
258         [TestCase("}", 0)]
259         [TestCase("]", 0)]
260         [TestCase(",", 0)]
261         [TestCase("'foo' 'bar'", 1)]
262         [TestCase(":", 0)]
263         [TestCase("'foo", 0)] // Incomplete string
264         [TestCase("{ 'foo' }", 2)]
265         [TestCase("{ x:1", 1)] // Property names must be quoted
266         [TestCase("{]", 1)]
267         [TestCase("[}", 1)]
268         [TestCase("[1,", 2)]
269         [TestCase("{'x':0]", 3)]
270         [TestCase("{ 'foo': }", 2)]
271         [TestCase("{ 'foo':'bar', }", 3)]
InvalidStructure(string json, int expectedValidTokens)272         public void InvalidStructure(string json, int expectedValidTokens)
273         {
274             // Note: we don't test that the earlier tokens are exactly as expected,
275             // partly because that's hard to parameterize.
276             var reader = new StringReader(json.Replace('\'', '"'));
277             var tokenizer = JsonTokenizer.FromTextReader(reader);
278             for (int i = 0; i < expectedValidTokens; i++)
279             {
280                 Assert.IsNotNull(tokenizer.Next());
281             }
282             Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
283         }
284 
285         [Test]
ArrayMixedType()286         public void ArrayMixedType()
287         {
288             AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]",
289                 JsonToken.StartArray,
290                 JsonToken.Value(1),
291                 JsonToken.Value("foo"),
292                 JsonToken.Null,
293                 JsonToken.False,
294                 JsonToken.True,
295                 JsonToken.StartArray,
296                 JsonToken.Value(2),
297                 JsonToken.EndArray,
298                 JsonToken.StartObject,
299                 JsonToken.Name("x"),
300                 JsonToken.Value("y"),
301                 JsonToken.EndObject,
302                 JsonToken.EndArray);
303         }
304 
305         [Test]
ObjectMixedType()306         public void ObjectMixedType()
307         {
308             AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true,
309                            'f': [2], 'g': {'x':'y' }}",
310                 JsonToken.StartObject,
311                 JsonToken.Name("a"),
312                 JsonToken.Value(1),
313                 JsonToken.Name("b"),
314                 JsonToken.Value("bar"),
315                 JsonToken.Name("c"),
316                 JsonToken.Null,
317                 JsonToken.Name("d"),
318                 JsonToken.False,
319                 JsonToken.Name("e"),
320                 JsonToken.True,
321                 JsonToken.Name("f"),
322                 JsonToken.StartArray,
323                 JsonToken.Value(2),
324                 JsonToken.EndArray,
325                 JsonToken.Name("g"),
326                 JsonToken.StartObject,
327                 JsonToken.Name("x"),
328                 JsonToken.Value("y"),
329                 JsonToken.EndObject,
330                 JsonToken.EndObject);
331         }
332 
333         [Test]
NextAfterEndDocumentThrows()334         public void NextAfterEndDocumentThrows()
335         {
336             var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
337             Assert.AreEqual(JsonToken.Null, tokenizer.Next());
338             Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
339             Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
340         }
341 
342         [Test]
CanPushBackEndDocument()343         public void CanPushBackEndDocument()
344         {
345             var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
346             Assert.AreEqual(JsonToken.Null, tokenizer.Next());
347             Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
348             tokenizer.PushBack(JsonToken.EndDocument);
349             Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
350             Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
351         }
352 
353         /// <summary>
354         /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
355         /// All apostrophes are first converted to double quotes, allowing any tests
356         /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding
357         /// messy string literal escaping. The "end document" token is not specified in the list of
358         /// expected tokens, but is implicit.
359         /// </summary>
AssertTokens(string json, params JsonToken[] expectedTokens)360         private static void AssertTokens(string json, params JsonToken[] expectedTokens)
361         {
362             AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens);
363         }
364 
365         /// <summary>
366         /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
367         /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character
368         /// replacement on the specified JSON, and should be used when the text contains apostrophes which
369         /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of
370         /// expected tokens, but is implicit.
371         /// </summary>
AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)372         private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
373         {
374             var reader = new StringReader(json);
375             var tokenizer = JsonTokenizer.FromTextReader(reader);
376             for (int i = 0; i < expectedTokens.Length; i++)
377             {
378                 var actualToken = tokenizer.Next();
379                 if (actualToken == JsonToken.EndDocument)
380                 {
381                     Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]);
382                 }
383                 Assert.AreEqual(expectedTokens[i], actualToken);
384             }
385             var finalToken = tokenizer.Next();
386             if (finalToken != JsonToken.EndDocument)
387             {
388                 Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken);
389             }
390         }
391 
AssertThrowsAfter(string json, params JsonToken[] expectedTokens)392         private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
393         {
394             var reader = new StringReader(json);
395             var tokenizer = JsonTokenizer.FromTextReader(reader);
396             for (int i = 0; i < expectedTokens.Length; i++)
397             {
398                 var actualToken = tokenizer.Next();
399                 if (actualToken == JsonToken.EndDocument)
400                 {
401                     Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]);
402                 }
403                 Assert.AreEqual(expectedTokens[i], actualToken);
404             }
405             Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
406         }
407     }
408 }
409