1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 
33 #include <google/protobuf/stubs/strutil.h>
34 
35 #include <locale.h>
36 
37 #include <google/protobuf/stubs/stl_util.h>
38 #include <google/protobuf/testing/googletest.h>
39 #include <gtest/gtest.h>
40 
41 #ifdef _WIN32
42 #define snprintf _snprintf
43 #endif
44 
45 namespace google {
46 namespace protobuf {
47 namespace {
48 
49 // TODO(kenton):  Copy strutil tests from google3?
50 
TEST(StringUtilityTest,ImmuneToLocales)51 TEST(StringUtilityTest, ImmuneToLocales) {
52   // Remember the old locale.
53   char* old_locale_cstr = setlocale(LC_NUMERIC, NULL);
54   ASSERT_TRUE(old_locale_cstr != NULL);
55   string old_locale = old_locale_cstr;
56 
57   // Set the locale to "C".
58   ASSERT_TRUE(setlocale(LC_NUMERIC, "C") != NULL);
59 
60   EXPECT_EQ("1.5", SimpleDtoa(1.5));
61   EXPECT_EQ("1.5", SimpleFtoa(1.5));
62 
63   if (setlocale(LC_NUMERIC, "es_ES") == NULL &&
64       setlocale(LC_NUMERIC, "es_ES.utf8") == NULL) {
65     // Some systems may not have the desired locale available.
66     GOOGLE_LOG(WARNING)
67       << "Couldn't set locale to es_ES.  Skipping this test.";
68   } else {
69     EXPECT_EQ("1.5", SimpleDtoa(1.5));
70     EXPECT_EQ("1.5", SimpleFtoa(1.5));
71   }
72 
73   // Return to original locale.
74   setlocale(LC_NUMERIC, old_locale.c_str());
75 }
76 
77 #define EXPECT_EQ_ARRAY(len, x, y, msg)                     \
78   for (int j = 0; j < len; ++j) {                           \
79     EXPECT_EQ(x[j], y[j]) << "" # x << " != " # y           \
80                           << " byte " << j << ": " << msg;  \
81   }
82 
83 static struct {
84   int plain_length;
85   const char* plaintext;
86   const char* cyphertext;
87 } base64_tests[] = {
88   // Empty string.
89   { 0, "", ""},
90 
91   // Basic bit patterns;
92   // values obtained with "echo -n '...' | uuencode -m test"
93 
94   { 1, "\000", "AA==" },
95   { 1, "\001", "AQ==" },
96   { 1, "\002", "Ag==" },
97   { 1, "\004", "BA==" },
98   { 1, "\010", "CA==" },
99   { 1, "\020", "EA==" },
100   { 1, "\040", "IA==" },
101   { 1, "\100", "QA==" },
102   { 1, "\200", "gA==" },
103 
104   { 1, "\377", "/w==" },
105   { 1, "\376", "/g==" },
106   { 1, "\375", "/Q==" },
107   { 1, "\373", "+w==" },
108   { 1, "\367", "9w==" },
109   { 1, "\357", "7w==" },
110   { 1, "\337", "3w==" },
111   { 1, "\277", "vw==" },
112   { 1, "\177", "fw==" },
113   { 2, "\000\000", "AAA=" },
114   { 2, "\000\001", "AAE=" },
115   { 2, "\000\002", "AAI=" },
116   { 2, "\000\004", "AAQ=" },
117   { 2, "\000\010", "AAg=" },
118   { 2, "\000\020", "ABA=" },
119   { 2, "\000\040", "ACA=" },
120   { 2, "\000\100", "AEA=" },
121   { 2, "\000\200", "AIA=" },
122   { 2, "\001\000", "AQA=" },
123   { 2, "\002\000", "AgA=" },
124   { 2, "\004\000", "BAA=" },
125   { 2, "\010\000", "CAA=" },
126   { 2, "\020\000", "EAA=" },
127   { 2, "\040\000", "IAA=" },
128   { 2, "\100\000", "QAA=" },
129   { 2, "\200\000", "gAA=" },
130 
131   { 2, "\377\377", "//8=" },
132   { 2, "\377\376", "//4=" },
133   { 2, "\377\375", "//0=" },
134   { 2, "\377\373", "//s=" },
135   { 2, "\377\367", "//c=" },
136   { 2, "\377\357", "/+8=" },
137   { 2, "\377\337", "/98=" },
138   { 2, "\377\277", "/78=" },
139   { 2, "\377\177", "/38=" },
140   { 2, "\376\377", "/v8=" },
141   { 2, "\375\377", "/f8=" },
142   { 2, "\373\377", "+/8=" },
143   { 2, "\367\377", "9/8=" },
144   { 2, "\357\377", "7/8=" },
145   { 2, "\337\377", "3/8=" },
146   { 2, "\277\377", "v/8=" },
147   { 2, "\177\377", "f/8=" },
148 
149   { 3, "\000\000\000", "AAAA" },
150   { 3, "\000\000\001", "AAAB" },
151   { 3, "\000\000\002", "AAAC" },
152   { 3, "\000\000\004", "AAAE" },
153   { 3, "\000\000\010", "AAAI" },
154   { 3, "\000\000\020", "AAAQ" },
155   { 3, "\000\000\040", "AAAg" },
156   { 3, "\000\000\100", "AABA" },
157   { 3, "\000\000\200", "AACA" },
158   { 3, "\000\001\000", "AAEA" },
159   { 3, "\000\002\000", "AAIA" },
160   { 3, "\000\004\000", "AAQA" },
161   { 3, "\000\010\000", "AAgA" },
162   { 3, "\000\020\000", "ABAA" },
163   { 3, "\000\040\000", "ACAA" },
164   { 3, "\000\100\000", "AEAA" },
165   { 3, "\000\200\000", "AIAA" },
166   { 3, "\001\000\000", "AQAA" },
167   { 3, "\002\000\000", "AgAA" },
168   { 3, "\004\000\000", "BAAA" },
169   { 3, "\010\000\000", "CAAA" },
170   { 3, "\020\000\000", "EAAA" },
171   { 3, "\040\000\000", "IAAA" },
172   { 3, "\100\000\000", "QAAA" },
173   { 3, "\200\000\000", "gAAA" },
174 
175   { 3, "\377\377\377", "////" },
176   { 3, "\377\377\376", "///+" },
177   { 3, "\377\377\375", "///9" },
178   { 3, "\377\377\373", "///7" },
179   { 3, "\377\377\367", "///3" },
180   { 3, "\377\377\357", "///v" },
181   { 3, "\377\377\337", "///f" },
182   { 3, "\377\377\277", "//+/" },
183   { 3, "\377\377\177", "//9/" },
184   { 3, "\377\376\377", "//7/" },
185   { 3, "\377\375\377", "//3/" },
186   { 3, "\377\373\377", "//v/" },
187   { 3, "\377\367\377", "//f/" },
188   { 3, "\377\357\377", "/+//" },
189   { 3, "\377\337\377", "/9//" },
190   { 3, "\377\277\377", "/7//" },
191   { 3, "\377\177\377", "/3//" },
192   { 3, "\376\377\377", "/v//" },
193   { 3, "\375\377\377", "/f//" },
194   { 3, "\373\377\377", "+///" },
195   { 3, "\367\377\377", "9///" },
196   { 3, "\357\377\377", "7///" },
197   { 3, "\337\377\377", "3///" },
198   { 3, "\277\377\377", "v///" },
199   { 3, "\177\377\377", "f///" },
200 
201   // Random numbers: values obtained with
202   //
203   //  #! /bin/bash
204   //  dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
205   //  od -N $1 -t o1 /tmp/bar.random
206   //  uuencode -m test < /tmp/bar.random
207   //
208   // where $1 is the number of bytes (2, 3)
209 
210   { 2, "\243\361", "o/E=" },
211   { 2, "\024\167", "FHc=" },
212   { 2, "\313\252", "y6o=" },
213   { 2, "\046\041", "JiE=" },
214   { 2, "\145\236", "ZZ4=" },
215   { 2, "\254\325", "rNU=" },
216   { 2, "\061\330", "Mdg=" },
217   { 2, "\245\032", "pRo=" },
218   { 2, "\006\000", "BgA=" },
219   { 2, "\375\131", "/Vk=" },
220   { 2, "\303\210", "w4g=" },
221   { 2, "\040\037", "IB8=" },
222   { 2, "\261\372", "sfo=" },
223   { 2, "\335\014", "3Qw=" },
224   { 2, "\233\217", "m48=" },
225   { 2, "\373\056", "+y4=" },
226   { 2, "\247\232", "p5o=" },
227   { 2, "\107\053", "Rys=" },
228   { 2, "\204\077", "hD8=" },
229   { 2, "\276\211", "vok=" },
230   { 2, "\313\110", "y0g=" },
231   { 2, "\363\376", "8/4=" },
232   { 2, "\251\234", "qZw=" },
233   { 2, "\103\262", "Q7I=" },
234   { 2, "\142\312", "Yso=" },
235   { 2, "\067\211", "N4k=" },
236   { 2, "\220\001", "kAE=" },
237   { 2, "\152\240", "aqA=" },
238   { 2, "\367\061", "9zE=" },
239   { 2, "\133\255", "W60=" },
240   { 2, "\176\035", "fh0=" },
241   { 2, "\032\231", "Gpk=" },
242 
243   { 3, "\013\007\144", "Cwdk" },
244   { 3, "\030\112\106", "GEpG" },
245   { 3, "\047\325\046", "J9Um" },
246   { 3, "\310\160\022", "yHAS" },
247   { 3, "\131\100\237", "WUCf" },
248   { 3, "\064\342\134", "NOJc" },
249   { 3, "\010\177\004", "CH8E" },
250   { 3, "\345\147\205", "5WeF" },
251   { 3, "\300\343\360", "wOPw" },
252   { 3, "\061\240\201", "MaCB" },
253   { 3, "\225\333\044", "ldsk" },
254   { 3, "\215\137\352", "jV/q" },
255   { 3, "\371\147\160", "+Wdw" },
256   { 3, "\030\320\051", "GNAp" },
257   { 3, "\044\174\241", "JHyh" },
258   { 3, "\260\127\037", "sFcf" },
259   { 3, "\111\045\033", "SSUb" },
260   { 3, "\202\114\107", "gkxH" },
261   { 3, "\057\371\042", "L/ki" },
262   { 3, "\223\247\244", "k6ek" },
263   { 3, "\047\216\144", "J45k" },
264   { 3, "\203\070\327", "gzjX" },
265   { 3, "\247\140\072", "p2A6" },
266   { 3, "\124\115\116", "VE1O" },
267   { 3, "\157\162\050", "b3Io" },
268   { 3, "\357\223\004", "75ME" },
269   { 3, "\052\117\156", "Kk9u" },
270   { 3, "\347\154\000", "52wA" },
271   { 3, "\303\012\142", "wwpi" },
272   { 3, "\060\035\362", "MB3y" },
273   { 3, "\130\226\361", "WJbx" },
274   { 3, "\173\013\071", "ews5" },
275   { 3, "\336\004\027", "3gQX" },
276   { 3, "\357\366\234", "7/ac" },
277   { 3, "\353\304\111", "68RJ" },
278   { 3, "\024\264\131", "FLRZ" },
279   { 3, "\075\114\251", "PUyp" },
280   { 3, "\315\031\225", "zRmV" },
281   { 3, "\154\201\276", "bIG+" },
282   { 3, "\200\066\072", "gDY6" },
283   { 3, "\142\350\267", "Yui3" },
284   { 3, "\033\000\166", "GwB2" },
285   { 3, "\210\055\077", "iC0/" },
286   { 3, "\341\037\124", "4R9U" },
287   { 3, "\161\103\152", "cUNq" },
288   { 3, "\270\142\131", "uGJZ" },
289   { 3, "\337\076\074", "3z48" },
290   { 3, "\375\106\362", "/Uby" },
291   { 3, "\227\301\127", "l8FX" },
292   { 3, "\340\002\234", "4AKc" },
293   { 3, "\121\064\033", "UTQb" },
294   { 3, "\157\134\143", "b1xj" },
295   { 3, "\247\055\327", "py3X" },
296   { 3, "\340\142\005", "4GIF" },
297   { 3, "\060\260\143", "MLBj" },
298   { 3, "\075\203\170", "PYN4" },
299   { 3, "\143\160\016", "Y3AO" },
300   { 3, "\313\013\063", "ywsz" },
301   { 3, "\174\236\135", "fJ5d" },
302   { 3, "\103\047\026", "QycW" },
303   { 3, "\365\005\343", "9QXj" },
304   { 3, "\271\160\223", "uXCT" },
305   { 3, "\362\255\172", "8q16" },
306   { 3, "\113\012\015", "SwoN" },
307 
308   // various lengths, generated by this python script:
309   //
310   // from string import lowercase as lc
311   // for i in range(27):
312   //   print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
313   //                                     lc[:i].encode('base64').strip())
314 
315   {  0, "",                           "" },
316   {  1, "a",                          "YQ==" },
317   {  2, "ab",                         "YWI=" },
318   {  3, "abc",                        "YWJj" },
319   {  4, "abcd",                       "YWJjZA==" },
320   {  5, "abcde",                      "YWJjZGU=" },
321   {  6, "abcdef",                     "YWJjZGVm" },
322   {  7, "abcdefg",                    "YWJjZGVmZw==" },
323   {  8, "abcdefgh",                   "YWJjZGVmZ2g=" },
324   {  9, "abcdefghi",                  "YWJjZGVmZ2hp" },
325   { 10, "abcdefghij",                 "YWJjZGVmZ2hpag==" },
326   { 11, "abcdefghijk",                "YWJjZGVmZ2hpams=" },
327   { 12, "abcdefghijkl",               "YWJjZGVmZ2hpamts" },
328   { 13, "abcdefghijklm",              "YWJjZGVmZ2hpamtsbQ==" },
329   { 14, "abcdefghijklmn",             "YWJjZGVmZ2hpamtsbW4=" },
330   { 15, "abcdefghijklmno",            "YWJjZGVmZ2hpamtsbW5v" },
331   { 16, "abcdefghijklmnop",           "YWJjZGVmZ2hpamtsbW5vcA==" },
332   { 17, "abcdefghijklmnopq",          "YWJjZGVmZ2hpamtsbW5vcHE=" },
333   { 18, "abcdefghijklmnopqr",         "YWJjZGVmZ2hpamtsbW5vcHFy" },
334   { 19, "abcdefghijklmnopqrs",        "YWJjZGVmZ2hpamtsbW5vcHFycw==" },
335   { 20, "abcdefghijklmnopqrst",       "YWJjZGVmZ2hpamtsbW5vcHFyc3Q=" },
336   { 21, "abcdefghijklmnopqrstu",      "YWJjZGVmZ2hpamtsbW5vcHFyc3R1" },
337   { 22, "abcdefghijklmnopqrstuv",     "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg==" },
338   { 23, "abcdefghijklmnopqrstuvw",    "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc=" },
339   { 24, "abcdefghijklmnopqrstuvwx",   "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4" },
340   { 25, "abcdefghijklmnopqrstuvwxy",  "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ==" },
341   { 26, "abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" },
342 };
343 
344 static struct {
345   const char* plaintext;
346   const char* cyphertext;
347 } base64_strings[] = {
348   // Some google quotes
349   // Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
350   // (Note that we're testing the websafe encoding, though, so if
351   // you add messages, be sure to run "tr -- '+/' '-_'" on the output)
352   { "I was always good at math and science, and I never realized "
353     "that was unusual or somehow undesirable. So one of the things "
354     "I care a lot about is helping to remove that stigma, "
355     "to show girls that you can be feminine, you can like the things "
356     "that girls like, but you can also be really good at technology. "
357     "You can be really good at building things."
358     " - Marissa Meyer, Newsweek, 2010-12-22" "\n",
359 
360     "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
361     "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
362     "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
363     "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
364     "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
365     "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
366     "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
367     "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
368     "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
369 
370   { "Typical first year for a new cluster: "
371     "~0.5 overheating "
372     "~1 PDU failure "
373     "~1 rack-move "
374     "~1 network rewiring "
375     "~20 rack failures "
376     "~5 racks go wonky "
377     "~8 network maintenances "
378     "~12 router reloads "
379     "~3 router failures "
380     "~dozens of minor 30-second blips for dns "
381     "~1000 individual machine failures "
382     "~thousands of hard drive failures "
383     "slow disks, bad memory, misconfigured machines, flaky machines, etc."
384     " - Jeff Dean, The Joys of Real Hardware" "\n",
385 
386     "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
387     "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
388     "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
389     "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
390     "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
391     "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
392     "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
393     "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
394     "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
395     "ZWFsIEhhcmR3YXJlCg" },
396 
397   { "I'm the head of the webspam team at Google.  "
398     "That means that if you type your name into Google and get porn back, "
399     "it's my fault. Unless you're a porn star, in which case porn is a "
400     "completely reasonable response."
401     " - Matt Cutts, Google Plus" "\n",
402 
403     "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
404     "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
405     "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
406     "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
407     "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
408     "IEdvb2dsZSBQbHVzCg" },
409 
410   { "It will still be a long time before machines approach human intelligence. "
411     "But luckily, machines don't actually have to be intelligent; "
412     "they just have to fake it. Access to a wealth of information, "
413     "combined with a rudimentary decision-making capacity, "
414     "can often be almost as useful. Of course, the results are better yet "
415     "when coupled with intelligence. A reference librarian with access to "
416     "a good search engine is a formidable tool."
417     " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n",
418 
419     "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
420     "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
421     "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
422     "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
423     "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
424     "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
425     "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
426     "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
427     "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
428     "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
429     "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
430     "NAo" },
431 
432   // Degenerate edge case
433   { "",
434     "" },
435 };
436 
TEST(Base64,EscapeAndUnescape)437 TEST(Base64, EscapeAndUnescape) {
438   // Check the short strings; this tests the math (and boundaries)
439   for (int i = 0; i < sizeof(base64_tests) / sizeof(base64_tests[0]); ++i) {
440     char encode_buffer[100];
441     int encode_length;
442     char decode_buffer[100];
443     int decode_length;
444     int cypher_length;
445     string decode_str;
446 
447     const unsigned char* unsigned_plaintext =
448       reinterpret_cast<const unsigned char*>(base64_tests[i].plaintext);
449 
450     StringPiece plaintext(base64_tests[i].plaintext,
451                           base64_tests[i].plain_length);
452 
453     cypher_length = strlen(base64_tests[i].cyphertext);
454 
455     // The basic escape function:
456     memset(encode_buffer, 0, sizeof(encode_buffer));
457     encode_length = Base64Escape(unsigned_plaintext,
458                                  base64_tests[i].plain_length,
459                                  encode_buffer,
460                                  sizeof(encode_buffer));
461     //    Is it of the expected length?
462     EXPECT_EQ(encode_length, cypher_length);
463     // Would it have been okay to allocate only CalculateBase64EscapeLen()?
464     EXPECT_EQ(CalculateBase64EscapedLen(base64_tests[i].plain_length),
465               encode_length);
466 
467     //    Is it the expected encoded value?
468     ASSERT_STREQ(encode_buffer, base64_tests[i].cyphertext);
469 
470     // If we encode it into a buffer of exactly the right length...
471     memset(encode_buffer, 0, sizeof(encode_buffer));
472     encode_length = Base64Escape(unsigned_plaintext,
473                                           base64_tests[i].plain_length,
474                                           encode_buffer,
475                                           cypher_length);
476     //    Is it still of the expected length?
477     EXPECT_EQ(encode_length, cypher_length);
478 
479     //    And is the value still correct?  (i.e., not losing the last byte)
480     EXPECT_STREQ(encode_buffer, base64_tests[i].cyphertext);
481 
482     // If we decode it back:
483     decode_str.clear();
484     EXPECT_TRUE(Base64Unescape(
485         StringPiece(encode_buffer, cypher_length), &decode_str));
486 
487     //    Is it of the expected length?
488     EXPECT_EQ(base64_tests[i].plain_length, decode_str.length());
489 
490     //    Is it the expected decoded value?
491     EXPECT_EQ(plaintext, decode_str);
492 
493     // Let's try with a pre-populated string.
494     string encoded("this junk should be ignored");
495     Base64Escape(string(base64_tests[i].plaintext,
496                         base64_tests[i].plain_length),
497                  &encoded);
498     EXPECT_EQ(encoded, string(encode_buffer, cypher_length));
499 
500     string decoded("this junk should be ignored");
501     EXPECT_TRUE(Base64Unescape(
502         StringPiece(encode_buffer, cypher_length), &decoded));
503     EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
504     EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
505 
506     // Our decoder treats the padding '=' characters at the end as
507     // optional (but if there are any, there must be the correct
508     // number of them.)  If encode_buffer has any, run some additional
509     // tests that fiddle with them.
510     char* first_equals = strchr(encode_buffer, '=');
511     if (first_equals) {
512       // How many equals signs does the string start with?
513       int equals = (*(first_equals+1) == '=') ? 2 : 1;
514 
515       // Try chopping off the equals sign(s) entirely.  The decoder
516       // should still be okay with this.
517       string decoded2("this junk should also be ignored");
518       *first_equals = '\0';
519       EXPECT_TRUE(Base64Unescape(
520           StringPiece(encode_buffer, first_equals - encode_buffer), &decoded2));
521       EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
522       EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
523 
524       // Now test chopping off the equals sign(s) and adding
525       // whitespace.  Our decoder should still accept this.
526       decoded2.assign("this junk should be ignored");
527       *first_equals = ' ';
528       *(first_equals+1) = '\0';
529       EXPECT_TRUE(Base64Unescape(
530           StringPiece(encode_buffer, first_equals - encode_buffer + 1),
531           &decoded2));
532       EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
533       EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
534 
535       // Now stick a bad character at the end of the string.  The decoder
536       // should refuse this string.
537       decoded2.assign("this junk should be ignored");
538       *first_equals = '?';
539       *(first_equals+1) = '\0';
540       EXPECT_TRUE(
541           !Base64Unescape(
542               StringPiece(encode_buffer, first_equals - encode_buffer + 1),
543               &decoded2));
544 
545       int len;
546 
547       // Test whitespace mixed with the padding.  (eg "AA = = ")  The
548       // decoder should accept this.
549       if (equals == 2) {
550         snprintf(first_equals, 6, " = = ");
551         len = first_equals - encode_buffer + 5;
552       } else {
553         snprintf(first_equals, 6, " = ");
554         len = first_equals - encode_buffer + 3;
555       }
556       decoded2.assign("this junk should be ignored");
557       EXPECT_TRUE(
558           Base64Unescape(StringPiece(encode_buffer, len), &decoded2));
559       EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
560       EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
561 
562       // Test whitespace mixed with the padding, but with the wrong
563       // number of equals signs (eg "AA = ").  The decoder should
564       // refuse these strings.
565       if (equals == 1) {
566         snprintf(first_equals, 6, " = = ");
567         len = first_equals - encode_buffer + 5;
568       } else {
569         snprintf(first_equals, 6, " = ");
570         len = first_equals - encode_buffer + 3;
571       }
572       EXPECT_TRUE(
573           !Base64Unescape(StringPiece(encode_buffer, len), &decoded2));
574     }
575 
576     // Cool! the basic Base64 encoder/decoder works.
577     // Let's try the alternate alphabet: tr -- '+/' '-_'
578 
579     char websafe[100];
580     memset(websafe, 0, sizeof(websafe));
581     strncpy(websafe, base64_tests[i].cyphertext, cypher_length);
582     for (int c = 0; c < sizeof(websafe); ++c) {
583       if ('+' == websafe[c]) { websafe[c] = '-'; }
584       if ('/' == websafe[c]) { websafe[c] = '_'; }
585     }
586 
587     // The websafe escape function:
588     memset(encode_buffer, 0, sizeof(encode_buffer));
589     encode_length = WebSafeBase64Escape(unsigned_plaintext,
590                                                  base64_tests[i].plain_length,
591                                                  encode_buffer,
592                                                  sizeof(encode_buffer),
593                                                  true);
594     //    Is it of the expected length?
595     EXPECT_EQ(encode_length, cypher_length);
596     EXPECT_EQ(
597         CalculateBase64EscapedLen(base64_tests[i].plain_length, true),
598         encode_length);
599 
600     //    Is it the expected encoded value?
601     EXPECT_STREQ(encode_buffer, websafe);
602 
603     //    If we encode it into a buffer of exactly the right length...
604     memset(encode_buffer, 0, sizeof(encode_buffer));
605     encode_length = WebSafeBase64Escape(unsigned_plaintext,
606                                                  base64_tests[i].plain_length,
607                                                  encode_buffer,
608                                                  cypher_length,
609                                                  true);
610     //    Is it still of the expected length?
611     EXPECT_EQ(encode_length, cypher_length);
612 
613     //    And is the value still correct?  (i.e., not losing the last byte)
614     EXPECT_STREQ(encode_buffer, websafe);
615 
616     //    Let's try the string version of the encoder
617     encoded = "this junk should be ignored";
618     WebSafeBase64Escape(
619         unsigned_plaintext, base64_tests[i].plain_length,
620         &encoded, true);
621     EXPECT_EQ(encoded.size(), cypher_length);
622     EXPECT_STREQ(encoded.c_str(), websafe);
623 
624     //    If we decode it back:
625     memset(decode_buffer, 0, sizeof(decode_buffer));
626     decode_length = WebSafeBase64Unescape(encode_buffer,
627                                                    cypher_length,
628                                                    decode_buffer,
629                                                    sizeof(decode_buffer));
630 
631     //    Is it of the expected length?
632     EXPECT_EQ(decode_length, base64_tests[i].plain_length);
633 
634     //    Is it the expected decoded value?
635     EXPECT_EQ(0,
636               memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
637 
638     //    If we decode it into a buffer of exactly the right length...
639     memset(decode_buffer, 0, sizeof(decode_buffer));
640     decode_length = WebSafeBase64Unescape(encode_buffer,
641                                                    cypher_length,
642                                                    decode_buffer,
643                                                    decode_length);
644 
645     //    Is it still of the expected length?
646     EXPECT_EQ(decode_length, base64_tests[i].plain_length);
647 
648     //    And is it the expected decoded value?
649     EXPECT_EQ(0,
650               memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
651 
652     // Try using '.' for the pad character.
653     for (int c = cypher_length - 1; c >= 0 && '=' == encode_buffer[c]; --c) {
654       encode_buffer[c] = '.';
655     }
656 
657     // If we decode it back:
658     memset(decode_buffer, 0, sizeof(decode_buffer));
659     decode_length = WebSafeBase64Unescape(encode_buffer,
660                                                    cypher_length,
661                                                    decode_buffer,
662                                                    sizeof(decode_buffer));
663 
664     // Is it of the expected length?
665     EXPECT_EQ(decode_length, base64_tests[i].plain_length);
666 
667     // Is it the expected decoded value?
668     EXPECT_EQ(0,
669               memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
670 
671     // If we decode it into a buffer of exactly the right length...
672     memset(decode_buffer, 0, sizeof(decode_buffer));
673     decode_length = WebSafeBase64Unescape(encode_buffer,
674                                                    cypher_length,
675                                                    decode_buffer,
676                                                    decode_length);
677 
678     // Is it still of the expected length?
679     EXPECT_EQ(decode_length, base64_tests[i].plain_length);
680 
681     // And is it the expected decoded value?
682     EXPECT_EQ(0,
683               memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
684 
685     // Let's try the string version of the decoder
686     decoded = "this junk should be ignored";
687     EXPECT_TRUE(WebSafeBase64Unescape(
688         StringPiece(encode_buffer, cypher_length), &decoded));
689     EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
690     EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
691 
692     // Okay! the websafe Base64 encoder/decoder works.
693     // Let's try the unpadded version
694 
695     for (int c = 0; c < sizeof(websafe); ++c) {
696       if ('=' == websafe[c]) {
697         websafe[c] = '\0';
698         cypher_length = c;
699         break;
700       }
701     }
702 
703     // The websafe escape function:
704     memset(encode_buffer, 0, sizeof(encode_buffer));
705     encode_length = WebSafeBase64Escape(unsigned_plaintext,
706                                                  base64_tests[i].plain_length,
707                                                  encode_buffer,
708                                                  sizeof(encode_buffer),
709                                                  false);
710     //    Is it of the expected length?
711     EXPECT_EQ(encode_length, cypher_length);
712     EXPECT_EQ(
713         CalculateBase64EscapedLen(base64_tests[i].plain_length, false),
714         encode_length);
715 
716     //    Is it the expected encoded value?
717     EXPECT_STREQ(encode_buffer, websafe);
718 
719     //    If we encode it into a buffer of exactly the right length...
720     memset(encode_buffer, 0, sizeof(encode_buffer));
721     encode_length = WebSafeBase64Escape(unsigned_plaintext,
722                                                  base64_tests[i].plain_length,
723                                                  encode_buffer,
724                                                  cypher_length,
725                                                  false);
726     //    Is it still of the expected length?
727     EXPECT_EQ(encode_length, cypher_length);
728 
729     //    And is the value still correct?  (i.e., not losing the last byte)
730     EXPECT_STREQ(encode_buffer, websafe);
731 
732     // Let's try the (other) string version of the encoder
733     string plain(base64_tests[i].plaintext, base64_tests[i].plain_length);
734     encoded = "this junk should be ignored";
735     WebSafeBase64Escape(plain, &encoded);
736     EXPECT_EQ(encoded.size(), cypher_length);
737     EXPECT_STREQ(encoded.c_str(), websafe);
738 
739     //    If we decode it back:
740     memset(decode_buffer, 0, sizeof(decode_buffer));
741     decode_length = WebSafeBase64Unescape(encode_buffer,
742                                                    cypher_length,
743                                                    decode_buffer,
744                                                    sizeof(decode_buffer));
745 
746     //    Is it of the expected length?
747     EXPECT_EQ(decode_length, base64_tests[i].plain_length);
748 
749     //    Is it the expected decoded value?
750     EXPECT_EQ(0,
751               memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
752 
753     //    If we decode it into a buffer of exactly the right length...
754     memset(decode_buffer, 0, sizeof(decode_buffer));
755     decode_length = WebSafeBase64Unescape(encode_buffer,
756                                                    cypher_length,
757                                                    decode_buffer,
758                                                    decode_length);
759 
760     //    Is it still of the expected length?
761     EXPECT_EQ(decode_length, base64_tests[i].plain_length);
762 
763     //    And is it the expected decoded value?
764     EXPECT_EQ(0,
765               memcmp(decode_buffer, base64_tests[i].plaintext, decode_length));
766 
767 
768     // Let's try the string version of the decoder
769     decoded = "this junk should be ignored";
770     EXPECT_TRUE(WebSafeBase64Unescape(
771         StringPiece(encode_buffer, cypher_length), &decoded));
772     EXPECT_EQ(decoded.size(), base64_tests[i].plain_length);
773     EXPECT_EQ_ARRAY(decoded.size(), decoded, base64_tests[i].plaintext, i);
774 
775     // This value works.  Try the next.
776   }
777 
778   // Now try the long strings, this tests the streaming
779   for (int i = 0; i < sizeof(base64_strings) / sizeof(base64_strings[0]);
780        ++i) {
781     const unsigned char* unsigned_plaintext =
782       reinterpret_cast<const unsigned char*>(base64_strings[i].plaintext);
783     int plain_length = strlen(base64_strings[i].plaintext);
784     int cypher_length = strlen(base64_strings[i].cyphertext);
785     vector<char> buffer(cypher_length+1);
786     int encode_length = WebSafeBase64Escape(unsigned_plaintext,
787                                                      plain_length,
788                                                      &buffer[0],
789                                                      buffer.size(),
790                                                      false);
791     EXPECT_EQ(cypher_length, encode_length);
792     EXPECT_EQ(
793         CalculateBase64EscapedLen(plain_length, false), encode_length);
794     buffer[ encode_length ] = '\0';
795     EXPECT_STREQ(base64_strings[i].cyphertext, &buffer[0]);
796   }
797 
798   // Verify the behavior when decoding bad data
799   {
800     const char* bad_data = "ab-/";
801     string buf;
802     EXPECT_FALSE(Base64Unescape(StringPiece(bad_data), &buf));
803     EXPECT_TRUE(!WebSafeBase64Unescape(bad_data, &buf));
804     EXPECT_TRUE(buf.empty());
805   }
806 }
807 
808 }  // anonymous namespace
809 }  // namespace protobuf
810 }  // namespace google
811