1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 Licensed under the Apache License, Version 2.0 (the "License");
3 you may not use this file except in compliance with the License.
4 You may obtain a copy of the License at
5
6 http://www.apache.org/licenses/LICENSE-2.0
7
8 Unless required by applicable law or agreed to in writing, software
9 distributed under the License is distributed on an "AS IS" BASIS,
10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 See the License for the specific language governing permissions and
12 limitations under the License.
13 ==============================================================================*/
14
15 #include "tensorflow/core/lib/strings/numbers.h"
16
17 #include <ctype.h>
18 #include <float.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <algorithm>
22 #include <cinttypes>
23 #include <cmath>
24 #include <locale>
25 #include <unordered_map>
26
27 #include "double-conversion/double-conversion.h"
28
29 #include "tensorflow/core/lib/strings/str_util.h"
30 #include "tensorflow/core/lib/strings/stringprintf.h"
31 #include "tensorflow/core/platform/logging.h"
32 #include "tensorflow/core/platform/macros.h"
33 #include "tensorflow/core/platform/types.h"
34
35 namespace tensorflow {
36
37 namespace {
38
39 template <typename T>
GetSpecialNumsSingleton()40 const std::unordered_map<string, T>* GetSpecialNumsSingleton() {
41 static const std::unordered_map<string, T>* special_nums =
42 CHECK_NOTNULL((new const std::unordered_map<string, T>{
43 {"inf", std::numeric_limits<T>::infinity()},
44 {"+inf", std::numeric_limits<T>::infinity()},
45 {"-inf", -std::numeric_limits<T>::infinity()},
46 {"infinity", std::numeric_limits<T>::infinity()},
47 {"+infinity", std::numeric_limits<T>::infinity()},
48 {"-infinity", -std::numeric_limits<T>::infinity()},
49 {"nan", std::numeric_limits<T>::quiet_NaN()},
50 {"+nan", std::numeric_limits<T>::quiet_NaN()},
51 {"-nan", -std::numeric_limits<T>::quiet_NaN()},
52 }));
53 return special_nums;
54 }
55
56 template <typename T>
locale_independent_strtonum(const char * str,const char ** endptr)57 T locale_independent_strtonum(const char* str, const char** endptr) {
58 auto special_nums = GetSpecialNumsSingleton<T>();
59 std::stringstream s(str);
60
61 // Check if str is one of the special numbers.
62 string special_num_str;
63 s >> special_num_str;
64
65 for (int i = 0; i < special_num_str.length(); ++i) {
66 special_num_str[i] =
67 std::tolower(special_num_str[i], std::locale::classic());
68 }
69
70 auto entry = special_nums->find(special_num_str);
71 if (entry != special_nums->end()) {
72 *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
73 : s.tellg());
74 return entry->second;
75 } else {
76 // Perhaps it's a hex number
77 if (special_num_str.compare(0, 2, "0x") == 0 ||
78 special_num_str.compare(0, 3, "-0x") == 0) {
79 return strtol(str, const_cast<char**>(endptr), 16);
80 }
81 }
82 // Reset the stream
83 s.str(str);
84 s.clear();
85 // Use the "C" locale
86 s.imbue(std::locale::classic());
87
88 T result;
89 s >> result;
90
91 // Set to result to what strto{f,d} functions would have returned. If the
92 // number was outside the range, the stringstream sets the fail flag, but
93 // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
94 if (s.fail()) {
95 if (result == std::numeric_limits<T>::max() ||
96 result == std::numeric_limits<T>::infinity()) {
97 result = std::numeric_limits<T>::infinity();
98 s.clear(s.rdstate() & ~std::ios::failbit);
99 } else if (result == -std::numeric_limits<T>::max() ||
100 result == -std::numeric_limits<T>::infinity()) {
101 result = -std::numeric_limits<T>::infinity();
102 s.clear(s.rdstate() & ~std::ios::failbit);
103 }
104 }
105
106 if (endptr) {
107 *endptr =
108 str +
109 (s.fail() ? static_cast<std::iostream::pos_type>(0)
110 : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
111 : s.tellg()));
112 }
113 return result;
114 }
115
116 static inline const double_conversion::StringToDoubleConverter&
StringToFloatConverter()117 StringToFloatConverter() {
118 static const double_conversion::StringToDoubleConverter converter(
119 double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
120 double_conversion::StringToDoubleConverter::ALLOW_HEX |
121 double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES |
122 double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY,
123 0., 0., "inf", "nan");
124 return converter;
125 }
126
127 } // namespace
128
129 namespace strings {
130
FastInt32ToBufferLeft(int32 i,char * buffer)131 size_t FastInt32ToBufferLeft(int32 i, char* buffer) {
132 uint32 u = i;
133 size_t length = 0;
134 if (i < 0) {
135 *buffer++ = '-';
136 ++length;
137 // We need to do the negation in modular (i.e., "unsigned")
138 // arithmetic; MSVC++ apparently warns for plain "-u", so
139 // we write the equivalent expression "0 - u" instead.
140 u = 0 - u;
141 }
142 length += FastUInt32ToBufferLeft(u, buffer);
143 return length;
144 }
145
FastUInt32ToBufferLeft(uint32 i,char * buffer)146 size_t FastUInt32ToBufferLeft(uint32 i, char* buffer) {
147 char* start = buffer;
148 do {
149 *buffer++ = ((i % 10) + '0');
150 i /= 10;
151 } while (i > 0);
152 *buffer = 0;
153 std::reverse(start, buffer);
154 return buffer - start;
155 }
156
FastInt64ToBufferLeft(int64 i,char * buffer)157 size_t FastInt64ToBufferLeft(int64 i, char* buffer) {
158 uint64 u = i;
159 size_t length = 0;
160 if (i < 0) {
161 *buffer++ = '-';
162 ++length;
163 u = 0 - u;
164 }
165 length += FastUInt64ToBufferLeft(u, buffer);
166 return length;
167 }
168
FastUInt64ToBufferLeft(uint64 i,char * buffer)169 size_t FastUInt64ToBufferLeft(uint64 i, char* buffer) {
170 char* start = buffer;
171 do {
172 *buffer++ = ((i % 10) + '0');
173 i /= 10;
174 } while (i > 0);
175 *buffer = 0;
176 std::reverse(start, buffer);
177 return buffer - start;
178 }
179
180 static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001;
181
DoubleToBuffer(double value,char * buffer)182 size_t DoubleToBuffer(double value, char* buffer) {
183 // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
184 // platforms these days. Just in case some system exists where DBL_DIG
185 // is significantly larger -- and risks overflowing our buffer -- we have
186 // this assert.
187 static_assert(DBL_DIG < 20, "DBL_DIG is too big");
188
189 if (std::abs(value) <= kDoublePrecisionCheckMax) {
190 int snprintf_result =
191 snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value);
192
193 // The snprintf should never overflow because the buffer is significantly
194 // larger than the precision we asked for.
195 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
196
197 if (locale_independent_strtonum<double>(buffer, nullptr) == value) {
198 // Round-tripping the string to double works; we're done.
199 return snprintf_result;
200 }
201 // else: full precision formatting needed. Fall through.
202 }
203
204 int snprintf_result =
205 snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value);
206
207 // Should never overflow; see above.
208 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
209
210 return snprintf_result;
211 }
212
213 namespace {
SafeFirstChar(StringPiece str)214 char SafeFirstChar(StringPiece str) {
215 if (str.empty()) return '\0';
216 return str[0];
217 }
SkipSpaces(StringPiece * str)218 void SkipSpaces(StringPiece* str) {
219 while (isspace(SafeFirstChar(*str))) str->remove_prefix(1);
220 }
221 } // namespace
222
safe_strto64(StringPiece str,int64 * value)223 bool safe_strto64(StringPiece str, int64* value) {
224 SkipSpaces(&str);
225
226 int64 vlimit = kint64max;
227 int sign = 1;
228 if (str_util::ConsumePrefix(&str, "-")) {
229 sign = -1;
230 // Different limit for positive and negative integers.
231 vlimit = kint64min;
232 }
233
234 if (!isdigit(SafeFirstChar(str))) return false;
235
236 int64 result = 0;
237 if (sign == 1) {
238 do {
239 int digit = SafeFirstChar(str) - '0';
240 if ((vlimit - digit) / 10 < result) {
241 return false;
242 }
243 result = result * 10 + digit;
244 str.remove_prefix(1);
245 } while (isdigit(SafeFirstChar(str)));
246 } else {
247 do {
248 int digit = SafeFirstChar(str) - '0';
249 if ((vlimit + digit) / 10 > result) {
250 return false;
251 }
252 result = result * 10 - digit;
253 str.remove_prefix(1);
254 } while (isdigit(SafeFirstChar(str)));
255 }
256
257 SkipSpaces(&str);
258 if (!str.empty()) return false;
259
260 *value = result;
261 return true;
262 }
263
safe_strtou64(StringPiece str,uint64 * value)264 bool safe_strtou64(StringPiece str, uint64* value) {
265 SkipSpaces(&str);
266 if (!isdigit(SafeFirstChar(str))) return false;
267
268 uint64 result = 0;
269 do {
270 int digit = SafeFirstChar(str) - '0';
271 if ((kuint64max - digit) / 10 < result) {
272 return false;
273 }
274 result = result * 10 + digit;
275 str.remove_prefix(1);
276 } while (isdigit(SafeFirstChar(str)));
277
278 SkipSpaces(&str);
279 if (!str.empty()) return false;
280
281 *value = result;
282 return true;
283 }
284
safe_strto32(StringPiece str,int32 * value)285 bool safe_strto32(StringPiece str, int32* value) {
286 SkipSpaces(&str);
287
288 int64 vmax = kint32max;
289 int sign = 1;
290 if (str_util::ConsumePrefix(&str, "-")) {
291 sign = -1;
292 // Different max for positive and negative integers.
293 ++vmax;
294 }
295
296 if (!isdigit(SafeFirstChar(str))) return false;
297
298 int64 result = 0;
299 do {
300 result = result * 10 + SafeFirstChar(str) - '0';
301 if (result > vmax) {
302 return false;
303 }
304 str.remove_prefix(1);
305 } while (isdigit(SafeFirstChar(str)));
306
307 SkipSpaces(&str);
308
309 if (!str.empty()) return false;
310
311 *value = static_cast<int32>(result * sign);
312 return true;
313 }
314
safe_strtou32(StringPiece str,uint32 * value)315 bool safe_strtou32(StringPiece str, uint32* value) {
316 SkipSpaces(&str);
317 if (!isdigit(SafeFirstChar(str))) return false;
318
319 int64 result = 0;
320 do {
321 result = result * 10 + SafeFirstChar(str) - '0';
322 if (result > kuint32max) {
323 return false;
324 }
325 str.remove_prefix(1);
326 } while (isdigit(SafeFirstChar(str)));
327
328 SkipSpaces(&str);
329 if (!str.empty()) return false;
330
331 *value = static_cast<uint32>(result);
332 return true;
333 }
334
safe_strtof(StringPiece str,float * value)335 bool safe_strtof(StringPiece str, float* value) {
336 int processed_characters_count = -1;
337 auto len = str.size();
338
339 // If string length exceeds buffer size or int max, fail.
340 if (len >= kFastToBufferSize) return false;
341 if (len > std::numeric_limits<int>::max()) return false;
342
343 *value = StringToFloatConverter().StringToFloat(
344 str.data(), static_cast<int>(len), &processed_characters_count);
345 return processed_characters_count > 0;
346 }
347
safe_strtod(StringPiece str,double * value)348 bool safe_strtod(StringPiece str, double* value) {
349 int processed_characters_count = -1;
350 auto len = str.size();
351
352 // If string length exceeds buffer size or int max, fail.
353 if (len >= kFastToBufferSize) return false;
354 if (len > std::numeric_limits<int>::max()) return false;
355
356 *value = StringToFloatConverter().StringToDouble(
357 str.data(), static_cast<int>(len), &processed_characters_count);
358 return processed_characters_count > 0;
359 }
360
FloatToBuffer(float value,char * buffer)361 size_t FloatToBuffer(float value, char* buffer) {
362 // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
363 // platforms these days. Just in case some system exists where FLT_DIG
364 // is significantly larger -- and risks overflowing our buffer -- we have
365 // this assert.
366 static_assert(FLT_DIG < 10, "FLT_DIG is too big");
367
368 int snprintf_result =
369 snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG, value);
370
371 // The snprintf should never overflow because the buffer is significantly
372 // larger than the precision we asked for.
373 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
374
375 float parsed_value;
376 if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
377 snprintf_result =
378 snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 3, value);
379
380 // Should never overflow; see above.
381 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
382 }
383 return snprintf_result;
384 }
385
FpToString(Fprint fp)386 string FpToString(Fprint fp) {
387 char buf[17];
388 snprintf(buf, sizeof(buf), "%016llx", static_cast<uint64>(fp));
389 return string(buf);
390 }
391
StringToFp(const string & s,Fprint * fp)392 bool StringToFp(const string& s, Fprint* fp) {
393 char junk;
394 uint64_t result;
395 if (sscanf(s.c_str(), "%" SCNx64 "%c", &result, &junk) == 1) {
396 *fp = result;
397 return true;
398 } else {
399 return false;
400 }
401 }
402
Uint64ToHexString(uint64 v,char * buf)403 StringPiece Uint64ToHexString(uint64 v, char* buf) {
404 static const char* hexdigits = "0123456789abcdef";
405 const int num_byte = 16;
406 buf[num_byte] = '\0';
407 for (int i = num_byte - 1; i >= 0; i--) {
408 buf[i] = hexdigits[v & 0xf];
409 v >>= 4;
410 }
411 return StringPiece(buf, num_byte);
412 }
413
HexStringToUint64(const StringPiece & s,uint64 * result)414 bool HexStringToUint64(const StringPiece& s, uint64* result) {
415 uint64 v = 0;
416 if (s.empty()) {
417 return false;
418 }
419 for (size_t i = 0; i < s.size(); i++) {
420 char c = s[i];
421 if (c >= '0' && c <= '9') {
422 v = (v << 4) + (c - '0');
423 } else if (c >= 'a' && c <= 'f') {
424 v = (v << 4) + 10 + (c - 'a');
425 } else if (c >= 'A' && c <= 'F') {
426 v = (v << 4) + 10 + (c - 'A');
427 } else {
428 return false;
429 }
430 }
431 *result = v;
432 return true;
433 }
434
HumanReadableNum(int64 value)435 string HumanReadableNum(int64 value) {
436 string s;
437 if (value < 0) {
438 s += "-";
439 value = -value;
440 }
441 if (value < 1000) {
442 Appendf(&s, "%lld", value);
443 } else if (value >= static_cast<int64>(1e15)) {
444 // Number bigger than 1E15; use that notation.
445 Appendf(&s, "%0.3G", static_cast<double>(value));
446 } else {
447 static const char units[] = "kMBT";
448 const char* unit = units;
449 while (value >= static_cast<int64>(1000000)) {
450 value /= static_cast<int64>(1000);
451 ++unit;
452 CHECK(unit < units + TF_ARRAYSIZE(units));
453 }
454 Appendf(&s, "%.2f%c", value / 1000.0, *unit);
455 }
456 return s;
457 }
458
HumanReadableNumBytes(int64 num_bytes)459 string HumanReadableNumBytes(int64 num_bytes) {
460 if (num_bytes == kint64min) {
461 // Special case for number with not representable negation.
462 return "-8E";
463 }
464
465 const char* neg_str = (num_bytes < 0) ? "-" : "";
466 if (num_bytes < 0) {
467 num_bytes = -num_bytes;
468 }
469
470 // Special case for bytes.
471 if (num_bytes < 1024) {
472 // No fractions for bytes.
473 char buf[8]; // Longest possible string is '-XXXXB'
474 snprintf(buf, sizeof(buf), "%s%lldB", neg_str,
475 static_cast<int64>(num_bytes));
476 return string(buf);
477 }
478
479 static const char units[] = "KMGTPE"; // int64 only goes up to E.
480 const char* unit = units;
481 while (num_bytes >= static_cast<int64>(1024) * 1024) {
482 num_bytes /= 1024;
483 ++unit;
484 CHECK(unit < units + TF_ARRAYSIZE(units));
485 }
486
487 // We use SI prefixes.
488 char buf[16];
489 snprintf(buf, sizeof(buf), ((*unit == 'K') ? "%s%.1f%ciB" : "%s%.2f%ciB"),
490 neg_str, num_bytes / 1024.0, *unit);
491 return string(buf);
492 }
493
HumanReadableElapsedTime(double seconds)494 string HumanReadableElapsedTime(double seconds) {
495 string human_readable;
496
497 if (seconds < 0) {
498 human_readable = "-";
499 seconds = -seconds;
500 }
501
502 // Start with us and keep going up to years.
503 // The comparisons must account for rounding to prevent the format breaking
504 // the tested condition and returning, e.g., "1e+03 us" instead of "1 ms".
505 const double microseconds = seconds * 1.0e6;
506 if (microseconds < 999.5) {
507 strings::Appendf(&human_readable, "%0.3g us", microseconds);
508 return human_readable;
509 }
510 double milliseconds = seconds * 1e3;
511 if (milliseconds >= .995 && milliseconds < 1) {
512 // Round half to even in Appendf would convert this to 0.999 ms.
513 milliseconds = 1.0;
514 }
515 if (milliseconds < 999.5) {
516 strings::Appendf(&human_readable, "%0.3g ms", milliseconds);
517 return human_readable;
518 }
519 if (seconds < 60.0) {
520 strings::Appendf(&human_readable, "%0.3g s", seconds);
521 return human_readable;
522 }
523 seconds /= 60.0;
524 if (seconds < 60.0) {
525 strings::Appendf(&human_readable, "%0.3g min", seconds);
526 return human_readable;
527 }
528 seconds /= 60.0;
529 if (seconds < 24.0) {
530 strings::Appendf(&human_readable, "%0.3g h", seconds);
531 return human_readable;
532 }
533 seconds /= 24.0;
534 if (seconds < 30.0) {
535 strings::Appendf(&human_readable, "%0.3g days", seconds);
536 return human_readable;
537 }
538 if (seconds < 365.2425) {
539 strings::Appendf(&human_readable, "%0.3g months", seconds / 30.436875);
540 return human_readable;
541 }
542 seconds /= 365.2425;
543 strings::Appendf(&human_readable, "%0.3g years", seconds);
544 return human_readable;
545 }
546
547 } // namespace strings
548 } // namespace tensorflow
549