1 // Copyright (c) 2009-2010 Satoshi Nakamoto
2 // Copyright (c) 2009-2014 The Bitcoin Core developers
3 // Distributed under the MIT software license, see the accompanying
4 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
6 #include "utilstrencodings.h"
8 #include "tinyformat.h"
17 string SanitizeString(const string& str)
20 * safeChars chosen to allow simple messages/URLs/email addresses, but avoid anything
21 * even possibly remotely dangerous like & or >
23 static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 .,;_/:?@()");
25 for (std::string::size_type i = 0; i < str.size(); i++)
27 if (safeChars.find(str[i]) != std::string::npos)
28 strResult.push_back(str[i]);
33 string SanitizeFilename(const string& str)
36 * safeChars chosen to restrict filename, keeping it simple to avoid cross-platform issues.
37 * http://stackoverflow.com/a/2306003
39 static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890");
41 for (std::string::size_type i = 0; i < str.size(); i++)
43 if (safeChars.find(str[i]) != std::string::npos)
44 strResult.push_back(str[i]);
49 const signed char p_util_hexdigit[256] =
50 { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,
54 -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
56 -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
57 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
58 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, };
67 signed char HexDigit(char c)
69 return p_util_hexdigit[(unsigned char)c];
72 bool IsHex(const string& str)
74 for(std::string::const_iterator it(str.begin()); it != str.end(); ++it)
76 if (HexDigit(*it) < 0)
79 return (str.size() > 0) && (str.size()%2 == 0);
82 vector<unsigned char> ParseHex(const char* psz)
84 // convert hex dump to vector
85 vector<unsigned char> vch;
90 signed char c = HexDigit(*psz++);
91 if (c == (signed char)-1)
93 unsigned char n = (c << 4);
95 if (c == (signed char)-1)
103 vector<unsigned char> ParseHex(const string& str)
105 return ParseHex(str.c_str());
108 string EncodeBase64(const unsigned char* pch, size_t len)
110 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
113 strRet.reserve((len+2)/3*4);
116 const unsigned char *pchEnd = pch+len;
123 case 0: // we have no bits
124 strRet += pbase64[enc >> 2];
125 left = (enc & 3) << 4;
129 case 1: // we have two bits
130 strRet += pbase64[left | (enc >> 4)];
131 left = (enc & 15) << 2;
135 case 2: // we have four bits
136 strRet += pbase64[left | (enc >> 6)];
137 strRet += pbase64[enc & 63];
145 strRet += pbase64[left];
154 string EncodeBase64(const string& str)
156 return EncodeBase64((const unsigned char*)str.c_str(), str.size());
159 vector<unsigned char> DecodeBase64(const char* p, bool* pfInvalid)
161 static const int decode64_table[256] =
163 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
166 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
167 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
168 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
169 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
181 vector<unsigned char> vchRet;
182 vchRet.reserve(strlen(p)*3/4);
189 int dec = decode64_table[(unsigned char)*p];
190 if (dec == -1) break;
194 case 0: // we have no bits and get 6
199 case 1: // we have 6 bits and keep 4
200 vchRet.push_back((left<<2) | (dec>>4));
205 case 2: // we have 4 bits and get 6, we keep 2
206 vchRet.push_back((left<<4) | (dec>>2));
211 case 3: // we have 2 bits and get 6
212 vchRet.push_back((left<<6) | dec);
221 case 0: // 4n base64 characters processed: ok
224 case 1: // 4n+1 base64 character processed: impossible
228 case 2: // 4n+2 base64 characters processed: require '=='
229 if (left || p[0] != '=' || p[1] != '=' || decode64_table[(unsigned char)p[2]] != -1)
233 case 3: // 4n+3 base64 characters processed: require '='
234 if (left || p[0] != '=' || decode64_table[(unsigned char)p[1]] != -1)
242 string DecodeBase64(const string& str)
244 vector<unsigned char> vchRet = DecodeBase64(str.c_str());
245 return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size());
248 string EncodeBase32(const unsigned char* pch, size_t len)
250 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
253 strRet.reserve((len+4)/5*8);
256 const unsigned char *pchEnd = pch+len;
263 case 0: // we have no bits
264 strRet += pbase32[enc >> 3];
265 left = (enc & 7) << 2;
269 case 1: // we have three bits
270 strRet += pbase32[left | (enc >> 6)];
271 strRet += pbase32[(enc >> 1) & 31];
272 left = (enc & 1) << 4;
276 case 2: // we have one bit
277 strRet += pbase32[left | (enc >> 4)];
278 left = (enc & 15) << 1;
282 case 3: // we have four bits
283 strRet += pbase32[left | (enc >> 7)];
284 strRet += pbase32[(enc >> 2) & 31];
285 left = (enc & 3) << 3;
289 case 4: // we have two bits
290 strRet += pbase32[left | (enc >> 5)];
291 strRet += pbase32[enc & 31];
296 static const int nPadding[5] = {0, 6, 4, 3, 1};
299 strRet += pbase32[left];
300 for (int n=0; n<nPadding[mode]; n++)
307 string EncodeBase32(const string& str)
309 return EncodeBase32((const unsigned char*)str.c_str(), str.size());
312 vector<unsigned char> DecodeBase32(const char* p, bool* pfInvalid)
314 static const int decode32_table[256] =
316 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
317 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
318 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
319 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
320 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
321 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
322 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
323 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
324 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
325 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
326 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
327 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
328 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
334 vector<unsigned char> vchRet;
335 vchRet.reserve((strlen(p))*5/8);
342 int dec = decode32_table[(unsigned char)*p];
343 if (dec == -1) break;
347 case 0: // we have no bits and get 5
352 case 1: // we have 5 bits and keep 2
353 vchRet.push_back((left<<3) | (dec>>2));
358 case 2: // we have 2 bits and keep 7
359 left = left << 5 | dec;
363 case 3: // we have 7 bits and keep 4
364 vchRet.push_back((left<<1) | (dec>>4));
369 case 4: // we have 4 bits, and keep 1
370 vchRet.push_back((left<<4) | (dec>>1));
375 case 5: // we have 1 bit, and keep 6
376 left = left << 5 | dec;
380 case 6: // we have 6 bits, and keep 3
381 vchRet.push_back((left<<2) | (dec>>3));
386 case 7: // we have 3 bits, and keep 0
387 vchRet.push_back((left<<5) | dec);
396 case 0: // 8n base32 characters processed: ok
399 case 1: // 8n+1 base32 characters processed: impossible
405 case 2: // 8n+2 base32 characters processed: require '======'
406 if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || p[4] != '=' || p[5] != '=' || decode32_table[(unsigned char)p[6]] != -1)
410 case 4: // 8n+4 base32 characters processed: require '===='
411 if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || decode32_table[(unsigned char)p[4]] != -1)
415 case 5: // 8n+5 base32 characters processed: require '==='
416 if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || decode32_table[(unsigned char)p[3]] != -1)
420 case 7: // 8n+7 base32 characters processed: require '='
421 if (left || p[0] != '=' || decode32_table[(unsigned char)p[1]] != -1)
429 string DecodeBase32(const string& str)
431 vector<unsigned char> vchRet = DecodeBase32(str.c_str());
432 return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size());
435 static bool ParsePrechecks(const std::string& str)
437 if (str.empty()) // No empty string allowed
439 if (str.size() >= 1 && (isspace(str[0]) || isspace(str[str.size()-1]))) // No padding allowed
441 if (str.size() != strlen(str.c_str())) // No embedded NUL characters allowed
446 bool ParseInt32(const std::string& str, int32_t *out)
448 if (!ParsePrechecks(str))
451 errno = 0; // strtol will not set errno if valid
452 long int n = strtol(str.c_str(), &endp, 10);
453 if(out) *out = (int32_t)n;
454 // Note that strtol returns a *long int*, so even if strtol doesn't report a over/underflow
455 // we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
456 // platforms the size of these types may be different.
457 return endp && *endp == 0 && !errno &&
458 n >= std::numeric_limits<int32_t>::min() &&
459 n <= std::numeric_limits<int32_t>::max();
462 bool ParseInt64(const std::string& str, int64_t *out)
464 if (!ParsePrechecks(str))
467 errno = 0; // strtoll will not set errno if valid
468 long long int n = strtoll(str.c_str(), &endp, 10);
469 if(out) *out = (int64_t)n;
470 // Note that strtoll returns a *long long int*, so even if strtol doesn't report a over/underflow
471 // we still have to check that the returned value is within the range of an *int64_t*.
472 return endp && *endp == 0 && !errno &&
473 n >= std::numeric_limits<int64_t>::min() &&
474 n <= std::numeric_limits<int64_t>::max();
477 bool ParseDouble(const std::string& str, double *out)
479 if (!ParsePrechecks(str))
481 if (str.size() >= 2 && str[0] == '0' && str[1] == 'x') // No hexadecimal floats allowed
483 std::istringstream text(str);
484 text.imbue(std::locale::classic());
487 if(out) *out = result;
488 return text.eof() && !text.fail();
491 std::string FormatParagraph(const std::string& in, size_t width, size_t indent)
493 std::stringstream out;
496 while(ptr < in.size())
498 // Find beginning of next word
499 ptr = in.find_first_not_of(' ', ptr);
500 if (ptr == std::string::npos)
502 // Find end of next word
503 size_t endword = in.find_first_of(' ', ptr);
504 if (endword == std::string::npos)
506 // Add newline and indentation if this wraps over the allowed width
509 if ((col + endword - ptr) > width)
512 for(size_t i=0; i<indent; ++i)
519 out << in.substr(ptr, endword - ptr);
520 col += endword - ptr + 1;
526 std::string i64tostr(int64_t n)
528 return strprintf("%d", n);
531 std::string itostr(int n)
533 return strprintf("%d", n);
536 int64_t atoi64(const char* psz)
541 return strtoll(psz, NULL, 10);
545 int64_t atoi64(const std::string& str)
548 return _atoi64(str.c_str());
550 return strtoll(str.c_str(), NULL, 10);
554 int atoi(const std::string& str)
556 return atoi(str.c_str());
559 /** Upper bound for mantissa.
560 * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
561 * Larger integers cannot consist of arbitrary combinations of 0-9:
563 * 999999999999999999 1^18-1
564 * 9223372036854775807 (1<<63)-1 (max int64_t)
565 * 9999999999999999999 1^19-1 (would overflow)
567 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
569 /** Helper function for ParseFixedPoint */
570 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
575 for (int i=0; i<=mantissa_tzeros; ++i) {
576 if (mantissa > (UPPER_BOUND / 10LL))
577 return false; /* overflow */
580 mantissa += ch - '0';
586 bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out)
588 int64_t mantissa = 0;
589 int64_t exponent = 0;
590 int mantissa_tzeros = 0;
591 bool mantissa_sign = false;
592 bool exponent_sign = false;
594 int end = val.size();
597 if (ptr < end && val[ptr] == '-') {
598 mantissa_sign = true;
603 if (val[ptr] == '0') {
606 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
607 while (ptr < end && val[ptr] >= '0' && val[ptr] <= '9') {
608 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
609 return false; /* overflow */
612 } else return false; /* missing expected digit */
613 } else return false; /* empty string or loose '-' */
614 if (ptr < end && val[ptr] == '.')
617 if (ptr < end && val[ptr] >= '0' && val[ptr] <= '9')
619 while (ptr < end && val[ptr] >= '0' && val[ptr] <= '9') {
620 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
621 return false; /* overflow */
625 } else return false; /* missing expected digit */
627 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
630 if (ptr < end && val[ptr] == '+')
632 else if (ptr < end && val[ptr] == '-') {
633 exponent_sign = true;
636 if (ptr < end && val[ptr] >= '0' && val[ptr] <= '9') {
637 while (ptr < end && val[ptr] >= '0' && val[ptr] <= '9') {
638 if (exponent > (UPPER_BOUND / 10LL))
639 return false; /* overflow */
640 exponent = exponent * 10 + val[ptr] - '0';
643 } else return false; /* missing expected digit */
646 return false; /* trailing garbage */
648 /* finalize exponent */
650 exponent = -exponent;
651 exponent = exponent - point_ofs + mantissa_tzeros;
653 /* finalize mantissa */
655 mantissa = -mantissa;
657 /* convert to one 64-bit fixed-point value */
658 exponent += decimals;
660 return false; /* cannot represent values smaller than 10^-decimals */
662 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
664 for (int i=0; i < exponent; ++i) {
665 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
666 return false; /* overflow */
669 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
670 return false; /* overflow */
673 *amount_out = mantissa;