1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 Licensed under the Apache License, Version 2.0 (the "License");
3 you may not use this file except in compliance with the License.
4 You may obtain a copy of the License at
5 
6     http://www.apache.org/licenses/LICENSE-2.0
7 
8 Unless required by applicable law or agreed to in writing, software
9 distributed under the License is distributed on an "AS IS" BASIS,
10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 See the License for the specific language governing permissions and
12 limitations under the License.
13 ==============================================================================*/
14 
15 #include "tensorflow/core/lib/strings/numbers.h"
16 
17 #include <ctype.h>
18 #include <float.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <algorithm>
22 #include <cinttypes>
23 #include <cmath>
24 #include <locale>
25 #include <unordered_map>
26 
27 #include "double-conversion/double-conversion.h"
28 
29 #include "tensorflow/core/lib/strings/str_util.h"
30 #include "tensorflow/core/lib/strings/stringprintf.h"
31 #include "tensorflow/core/platform/logging.h"
32 #include "tensorflow/core/platform/macros.h"
33 #include "tensorflow/core/platform/types.h"
34 
35 namespace tensorflow {
36 
37 namespace {
38 
39 template <typename T>
GetSpecialNumsSingleton()40 const std::unordered_map<string, T>* GetSpecialNumsSingleton() {
41   static const std::unordered_map<string, T>* special_nums =
42       CHECK_NOTNULL((new const std::unordered_map<string, T>{
43           {"inf", std::numeric_limits<T>::infinity()},
44           {"+inf", std::numeric_limits<T>::infinity()},
45           {"-inf", -std::numeric_limits<T>::infinity()},
46           {"infinity", std::numeric_limits<T>::infinity()},
47           {"+infinity", std::numeric_limits<T>::infinity()},
48           {"-infinity", -std::numeric_limits<T>::infinity()},
49           {"nan", std::numeric_limits<T>::quiet_NaN()},
50           {"+nan", std::numeric_limits<T>::quiet_NaN()},
51           {"-nan", -std::numeric_limits<T>::quiet_NaN()},
52       }));
53   return special_nums;
54 }
55 
56 template <typename T>
locale_independent_strtonum(const char * str,const char ** endptr)57 T locale_independent_strtonum(const char* str, const char** endptr) {
58   auto special_nums = GetSpecialNumsSingleton<T>();
59   std::stringstream s(str);
60 
61   // Check if str is one of the special numbers.
62   string special_num_str;
63   s >> special_num_str;
64 
65   for (int i = 0; i < special_num_str.length(); ++i) {
66     special_num_str[i] =
67         std::tolower(special_num_str[i], std::locale::classic());
68   }
69 
70   auto entry = special_nums->find(special_num_str);
71   if (entry != special_nums->end()) {
72     *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
73                              : s.tellg());
74     return entry->second;
75   } else {
76     // Perhaps it's a hex number
77     if (special_num_str.compare(0, 2, "0x") == 0 ||
78         special_num_str.compare(0, 3, "-0x") == 0) {
79       return strtol(str, const_cast<char**>(endptr), 16);
80     }
81   }
82   // Reset the stream
83   s.str(str);
84   s.clear();
85   // Use the "C" locale
86   s.imbue(std::locale::classic());
87 
88   T result;
89   s >> result;
90 
91   // Set to result to what strto{f,d} functions would have returned. If the
92   // number was outside the range, the stringstream sets the fail flag, but
93   // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
94   if (s.fail()) {
95     if (result == std::numeric_limits<T>::max() ||
96         result == std::numeric_limits<T>::infinity()) {
97       result = std::numeric_limits<T>::infinity();
98       s.clear(s.rdstate() & ~std::ios::failbit);
99     } else if (result == -std::numeric_limits<T>::max() ||
100                result == -std::numeric_limits<T>::infinity()) {
101       result = -std::numeric_limits<T>::infinity();
102       s.clear(s.rdstate() & ~std::ios::failbit);
103     }
104   }
105 
106   if (endptr) {
107     *endptr =
108         str +
109         (s.fail() ? static_cast<std::iostream::pos_type>(0)
110                   : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
111                              : s.tellg()));
112   }
113   return result;
114 }
115 
116 static inline const double_conversion::StringToDoubleConverter&
StringToFloatConverter()117 StringToFloatConverter() {
118   static const double_conversion::StringToDoubleConverter converter(
119       double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
120           double_conversion::StringToDoubleConverter::ALLOW_HEX |
121           double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES |
122           double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY,
123       0., 0., "inf", "nan");
124   return converter;
125 }
126 
127 }  // namespace
128 
129 namespace strings {
130 
FastInt32ToBufferLeft(int32 i,char * buffer)131 size_t FastInt32ToBufferLeft(int32 i, char* buffer) {
132   uint32 u = i;
133   size_t length = 0;
134   if (i < 0) {
135     *buffer++ = '-';
136     ++length;
137     // We need to do the negation in modular (i.e., "unsigned")
138     // arithmetic; MSVC++ apparently warns for plain "-u", so
139     // we write the equivalent expression "0 - u" instead.
140     u = 0 - u;
141   }
142   length += FastUInt32ToBufferLeft(u, buffer);
143   return length;
144 }
145 
FastUInt32ToBufferLeft(uint32 i,char * buffer)146 size_t FastUInt32ToBufferLeft(uint32 i, char* buffer) {
147   char* start = buffer;
148   do {
149     *buffer++ = ((i % 10) + '0');
150     i /= 10;
151   } while (i > 0);
152   *buffer = 0;
153   std::reverse(start, buffer);
154   return buffer - start;
155 }
156 
FastInt64ToBufferLeft(int64 i,char * buffer)157 size_t FastInt64ToBufferLeft(int64 i, char* buffer) {
158   uint64 u = i;
159   size_t length = 0;
160   if (i < 0) {
161     *buffer++ = '-';
162     ++length;
163     u = 0 - u;
164   }
165   length += FastUInt64ToBufferLeft(u, buffer);
166   return length;
167 }
168 
FastUInt64ToBufferLeft(uint64 i,char * buffer)169 size_t FastUInt64ToBufferLeft(uint64 i, char* buffer) {
170   char* start = buffer;
171   do {
172     *buffer++ = ((i % 10) + '0');
173     i /= 10;
174   } while (i > 0);
175   *buffer = 0;
176   std::reverse(start, buffer);
177   return buffer - start;
178 }
179 
180 static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001;
181 
DoubleToBuffer(double value,char * buffer)182 size_t DoubleToBuffer(double value, char* buffer) {
183   // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
184   // platforms these days.  Just in case some system exists where DBL_DIG
185   // is significantly larger -- and risks overflowing our buffer -- we have
186   // this assert.
187   static_assert(DBL_DIG < 20, "DBL_DIG is too big");
188 
189   if (std::abs(value) <= kDoublePrecisionCheckMax) {
190     int snprintf_result =
191         snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value);
192 
193     // The snprintf should never overflow because the buffer is significantly
194     // larger than the precision we asked for.
195     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
196 
197     if (locale_independent_strtonum<double>(buffer, nullptr) == value) {
198       // Round-tripping the string to double works; we're done.
199       return snprintf_result;
200     }
201     // else: full precision formatting needed. Fall through.
202   }
203 
204   int snprintf_result =
205       snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value);
206 
207   // Should never overflow; see above.
208   DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
209 
210   return snprintf_result;
211 }
212 
213 namespace {
SafeFirstChar(StringPiece str)214 char SafeFirstChar(StringPiece str) {
215   if (str.empty()) return '\0';
216   return str[0];
217 }
SkipSpaces(StringPiece * str)218 void SkipSpaces(StringPiece* str) {
219   while (isspace(SafeFirstChar(*str))) str->remove_prefix(1);
220 }
221 }  // namespace
222 
safe_strto64(StringPiece str,int64 * value)223 bool safe_strto64(StringPiece str, int64* value) {
224   SkipSpaces(&str);
225 
226   int64 vlimit = kint64max;
227   int sign = 1;
228   if (str_util::ConsumePrefix(&str, "-")) {
229     sign = -1;
230     // Different limit for positive and negative integers.
231     vlimit = kint64min;
232   }
233 
234   if (!isdigit(SafeFirstChar(str))) return false;
235 
236   int64 result = 0;
237   if (sign == 1) {
238     do {
239       int digit = SafeFirstChar(str) - '0';
240       if ((vlimit - digit) / 10 < result) {
241         return false;
242       }
243       result = result * 10 + digit;
244       str.remove_prefix(1);
245     } while (isdigit(SafeFirstChar(str)));
246   } else {
247     do {
248       int digit = SafeFirstChar(str) - '0';
249       if ((vlimit + digit) / 10 > result) {
250         return false;
251       }
252       result = result * 10 - digit;
253       str.remove_prefix(1);
254     } while (isdigit(SafeFirstChar(str)));
255   }
256 
257   SkipSpaces(&str);
258   if (!str.empty()) return false;
259 
260   *value = result;
261   return true;
262 }
263 
safe_strtou64(StringPiece str,uint64 * value)264 bool safe_strtou64(StringPiece str, uint64* value) {
265   SkipSpaces(&str);
266   if (!isdigit(SafeFirstChar(str))) return false;
267 
268   uint64 result = 0;
269   do {
270     int digit = SafeFirstChar(str) - '0';
271     if ((kuint64max - digit) / 10 < result) {
272       return false;
273     }
274     result = result * 10 + digit;
275     str.remove_prefix(1);
276   } while (isdigit(SafeFirstChar(str)));
277 
278   SkipSpaces(&str);
279   if (!str.empty()) return false;
280 
281   *value = result;
282   return true;
283 }
284 
safe_strto32(StringPiece str,int32 * value)285 bool safe_strto32(StringPiece str, int32* value) {
286   SkipSpaces(&str);
287 
288   int64 vmax = kint32max;
289   int sign = 1;
290   if (str_util::ConsumePrefix(&str, "-")) {
291     sign = -1;
292     // Different max for positive and negative integers.
293     ++vmax;
294   }
295 
296   if (!isdigit(SafeFirstChar(str))) return false;
297 
298   int64 result = 0;
299   do {
300     result = result * 10 + SafeFirstChar(str) - '0';
301     if (result > vmax) {
302       return false;
303     }
304     str.remove_prefix(1);
305   } while (isdigit(SafeFirstChar(str)));
306 
307   SkipSpaces(&str);
308 
309   if (!str.empty()) return false;
310 
311   *value = static_cast<int32>(result * sign);
312   return true;
313 }
314 
safe_strtou32(StringPiece str,uint32 * value)315 bool safe_strtou32(StringPiece str, uint32* value) {
316   SkipSpaces(&str);
317   if (!isdigit(SafeFirstChar(str))) return false;
318 
319   int64 result = 0;
320   do {
321     result = result * 10 + SafeFirstChar(str) - '0';
322     if (result > kuint32max) {
323       return false;
324     }
325     str.remove_prefix(1);
326   } while (isdigit(SafeFirstChar(str)));
327 
328   SkipSpaces(&str);
329   if (!str.empty()) return false;
330 
331   *value = static_cast<uint32>(result);
332   return true;
333 }
334 
safe_strtof(StringPiece str,float * value)335 bool safe_strtof(StringPiece str, float* value) {
336   int processed_characters_count = -1;
337   auto len = str.size();
338 
339   // If string length exceeds buffer size or int max, fail.
340   if (len >= kFastToBufferSize) return false;
341   if (len > std::numeric_limits<int>::max()) return false;
342 
343   *value = StringToFloatConverter().StringToFloat(
344       str.data(), static_cast<int>(len), &processed_characters_count);
345   return processed_characters_count > 0;
346 }
347 
safe_strtod(StringPiece str,double * value)348 bool safe_strtod(StringPiece str, double* value) {
349   int processed_characters_count = -1;
350   auto len = str.size();
351 
352   // If string length exceeds buffer size or int max, fail.
353   if (len >= kFastToBufferSize) return false;
354   if (len > std::numeric_limits<int>::max()) return false;
355 
356   *value = StringToFloatConverter().StringToDouble(
357       str.data(), static_cast<int>(len), &processed_characters_count);
358   return processed_characters_count > 0;
359 }
360 
FloatToBuffer(float value,char * buffer)361 size_t FloatToBuffer(float value, char* buffer) {
362   // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
363   // platforms these days.  Just in case some system exists where FLT_DIG
364   // is significantly larger -- and risks overflowing our buffer -- we have
365   // this assert.
366   static_assert(FLT_DIG < 10, "FLT_DIG is too big");
367 
368   int snprintf_result =
369       snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG, value);
370 
371   // The snprintf should never overflow because the buffer is significantly
372   // larger than the precision we asked for.
373   DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
374 
375   float parsed_value;
376   if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
377     snprintf_result =
378         snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 3, value);
379 
380     // Should never overflow; see above.
381     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
382   }
383   return snprintf_result;
384 }
385 
FpToString(Fprint fp)386 string FpToString(Fprint fp) {
387   char buf[17];
388   snprintf(buf, sizeof(buf), "%016llx", static_cast<uint64>(fp));
389   return string(buf);
390 }
391 
StringToFp(const string & s,Fprint * fp)392 bool StringToFp(const string& s, Fprint* fp) {
393   char junk;
394   uint64_t result;
395   if (sscanf(s.c_str(), "%" SCNx64 "%c", &result, &junk) == 1) {
396     *fp = result;
397     return true;
398   } else {
399     return false;
400   }
401 }
402 
Uint64ToHexString(uint64 v,char * buf)403 StringPiece Uint64ToHexString(uint64 v, char* buf) {
404   static const char* hexdigits = "0123456789abcdef";
405   const int num_byte = 16;
406   buf[num_byte] = '\0';
407   for (int i = num_byte - 1; i >= 0; i--) {
408     buf[i] = hexdigits[v & 0xf];
409     v >>= 4;
410   }
411   return StringPiece(buf, num_byte);
412 }
413 
HexStringToUint64(const StringPiece & s,uint64 * result)414 bool HexStringToUint64(const StringPiece& s, uint64* result) {
415   uint64 v = 0;
416   if (s.empty()) {
417     return false;
418   }
419   for (size_t i = 0; i < s.size(); i++) {
420     char c = s[i];
421     if (c >= '0' && c <= '9') {
422       v = (v << 4) + (c - '0');
423     } else if (c >= 'a' && c <= 'f') {
424       v = (v << 4) + 10 + (c - 'a');
425     } else if (c >= 'A' && c <= 'F') {
426       v = (v << 4) + 10 + (c - 'A');
427     } else {
428       return false;
429     }
430   }
431   *result = v;
432   return true;
433 }
434 
HumanReadableNum(int64 value)435 string HumanReadableNum(int64 value) {
436   string s;
437   if (value < 0) {
438     s += "-";
439     value = -value;
440   }
441   if (value < 1000) {
442     Appendf(&s, "%lld", value);
443   } else if (value >= static_cast<int64>(1e15)) {
444     // Number bigger than 1E15; use that notation.
445     Appendf(&s, "%0.3G", static_cast<double>(value));
446   } else {
447     static const char units[] = "kMBT";
448     const char* unit = units;
449     while (value >= static_cast<int64>(1000000)) {
450       value /= static_cast<int64>(1000);
451       ++unit;
452       CHECK(unit < units + TF_ARRAYSIZE(units));
453     }
454     Appendf(&s, "%.2f%c", value / 1000.0, *unit);
455   }
456   return s;
457 }
458 
HumanReadableNumBytes(int64 num_bytes)459 string HumanReadableNumBytes(int64 num_bytes) {
460   if (num_bytes == kint64min) {
461     // Special case for number with not representable negation.
462     return "-8E";
463   }
464 
465   const char* neg_str = (num_bytes < 0) ? "-" : "";
466   if (num_bytes < 0) {
467     num_bytes = -num_bytes;
468   }
469 
470   // Special case for bytes.
471   if (num_bytes < 1024) {
472     // No fractions for bytes.
473     char buf[8];  // Longest possible string is '-XXXXB'
474     snprintf(buf, sizeof(buf), "%s%lldB", neg_str,
475              static_cast<int64>(num_bytes));
476     return string(buf);
477   }
478 
479   static const char units[] = "KMGTPE";  // int64 only goes up to E.
480   const char* unit = units;
481   while (num_bytes >= static_cast<int64>(1024) * 1024) {
482     num_bytes /= 1024;
483     ++unit;
484     CHECK(unit < units + TF_ARRAYSIZE(units));
485   }
486 
487   // We use SI prefixes.
488   char buf[16];
489   snprintf(buf, sizeof(buf), ((*unit == 'K') ? "%s%.1f%ciB" : "%s%.2f%ciB"),
490            neg_str, num_bytes / 1024.0, *unit);
491   return string(buf);
492 }
493 
HumanReadableElapsedTime(double seconds)494 string HumanReadableElapsedTime(double seconds) {
495   string human_readable;
496 
497   if (seconds < 0) {
498     human_readable = "-";
499     seconds = -seconds;
500   }
501 
502   // Start with us and keep going up to years.
503   // The comparisons must account for rounding to prevent the format breaking
504   // the tested condition and returning, e.g., "1e+03 us" instead of "1 ms".
505   const double microseconds = seconds * 1.0e6;
506   if (microseconds < 999.5) {
507     strings::Appendf(&human_readable, "%0.3g us", microseconds);
508     return human_readable;
509   }
510   double milliseconds = seconds * 1e3;
511   if (milliseconds >= .995 && milliseconds < 1) {
512     // Round half to even in Appendf would convert this to 0.999 ms.
513     milliseconds = 1.0;
514   }
515   if (milliseconds < 999.5) {
516     strings::Appendf(&human_readable, "%0.3g ms", milliseconds);
517     return human_readable;
518   }
519   if (seconds < 60.0) {
520     strings::Appendf(&human_readable, "%0.3g s", seconds);
521     return human_readable;
522   }
523   seconds /= 60.0;
524   if (seconds < 60.0) {
525     strings::Appendf(&human_readable, "%0.3g min", seconds);
526     return human_readable;
527   }
528   seconds /= 60.0;
529   if (seconds < 24.0) {
530     strings::Appendf(&human_readable, "%0.3g h", seconds);
531     return human_readable;
532   }
533   seconds /= 24.0;
534   if (seconds < 30.0) {
535     strings::Appendf(&human_readable, "%0.3g days", seconds);
536     return human_readable;
537   }
538   if (seconds < 365.2425) {
539     strings::Appendf(&human_readable, "%0.3g months", seconds / 30.436875);
540     return human_readable;
541   }
542   seconds /= 365.2425;
543   strings::Appendf(&human_readable, "%0.3g years", seconds);
544   return human_readable;
545 }
546 
547 }  // namespace strings
548 }  // namespace tensorflow
549