base64.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /*
  2. This is free and unencumbered software released into the public domain.
  3. Anyone is free to copy, modify, publish, use, compile, sell, or
  4. distribute this software, either in source code form or as a compiled
  5. binary, for any purpose, commercial or non-commercial, and by any
  6. means.
  7. In jurisdictions that recognize copyright laws, the author or authors
  8. of this software dedicate any and all copyright interest in the
  9. software to the public domain. We make this dedication for the benefit
  10. of the public at large and to the detriment of our heirs and
  11. successors. We intend this dedication to be an overt act of
  12. relinquishment in perpetuity of all present and future rights to this
  13. software under copyright law.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. OTHER DEALINGS IN THE SOFTWARE.
  21. For more information, please refer to <http://unlicense.org>
  22. */
  23. #ifndef PUBLIC_DOMAIN_BASE64_HPP_
  24. #define PUBLIC_DOMAIN_BASE64_HPP_
  25. #include <cstdint>
  26. #include <iterator>
  27. #include <stdexcept>
  28. #include <string>
  29. class base64_error : public std::runtime_error
  30. {
  31. public:
  32. using std::runtime_error::runtime_error;
  33. };
  34. class base64
  35. {
  36. public:
  37. enum class alphabet
  38. {
  39. /** the alphabet is detected automatically */
  40. auto_,
  41. /** the standard base64 alphabet is used */
  42. standard,
  43. /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/
  44. url_filename_safe
  45. };
  46. enum class decoding_behavior
  47. {
  48. /** if the input is not padded, the remaining bits are ignored */
  49. moderate,
  50. /** if a padding character is encounter decoding is finished */
  51. loose
  52. };
  53. /**
  54. Encodes all the elements from `in_begin` to `in_end` to `out`.
  55. @warning The source and destination cannot overlap. The destination must be able to hold at least
  56. `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
  57. @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
  58. 8 bits
  59. @tparam Output_iterator the destination; the elements written to it are from the type `char`
  60. @param in_begin the beginning of the source
  61. @param in_end the ending of the source
  62. @param out the destination iterator
  63. @param alphabet which alphabet should be used
  64. @returns the iterator to the next element past the last element copied
  65. @throws see `Input_iterator` and `Output_iterator`
  66. */
  67. template<typename Input_iterator, typename Output_iterator>
  68. static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
  69. alphabet alphabet = alphabet::standard)
  70. {
  71. constexpr auto pad = '=';
  72. const char* alpha = alphabet == alphabet::url_filename_safe
  73. ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
  74. : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  75. while (in_begin != in_end) {
  76. std::uint8_t i0 = 0, i1 = 0, i2 = 0;
  77. // first character
  78. i0 = static_cast<std::uint8_t>(*in_begin);
  79. ++in_begin;
  80. *out = alpha[i0 >> 2 & 0x3f];
  81. ++out;
  82. // part of first character and second
  83. if (in_begin != in_end) {
  84. i1 = static_cast<std::uint8_t>(*in_begin);
  85. ++in_begin;
  86. *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)];
  87. ++out;
  88. } else {
  89. *out = alpha[(i0 & 0x3) << 4];
  90. ++out;
  91. // last padding
  92. *out = pad;
  93. ++out;
  94. // last padding
  95. *out = pad;
  96. ++out;
  97. break;
  98. }
  99. // part of second character and third
  100. if (in_begin != in_end) {
  101. i2 = static_cast<std::uint8_t>(*in_begin);
  102. ++in_begin;
  103. *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)];
  104. ++out;
  105. } else {
  106. *out = alpha[(i1 & 0xf) << 2];
  107. ++out;
  108. // last padding
  109. *out = pad;
  110. ++out;
  111. break;
  112. }
  113. // rest of third
  114. *out = alpha[i2 & 0x3f];
  115. ++out;
  116. }
  117. return out;
  118. }
  119. /**
  120. Encodes a string.
  121. @param str the string that should be encoded
  122. @param alphabet which alphabet should be used
  123. @returns the encoded base64 string
  124. @throws see base64::encode()
  125. */
  126. static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
  127. {
  128. std::string result;
  129. result.reserve(required_encode_size(str.length()) + 1);
  130. encode(str.begin(), str.end(), std::back_inserter(result), alphabet);
  131. return result;
  132. }
  133. /**
  134. Encodes a char array.
  135. @param buffer the char array
  136. @param size the size of the array
  137. @param alphabet which alphabet should be used
  138. @returns the encoded string
  139. */
  140. static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
  141. {
  142. std::string result;
  143. result.reserve(required_encode_size(size) + 1);
  144. encode(buffer, buffer + size, std::back_inserter(result), alphabet);
  145. return result;
  146. }
  147. /**
  148. Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
  149. in other words: inplace decoding is possible.
  150. @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
  151. otherwise the behavior depends on the output iterator.
  152. @tparam Input_iterator the source; the returned elements are cast to `char`
  153. @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
  154. @param in_begin the beginning of the source
  155. @param in_end the ending of the source
  156. @param out the destination iterator
  157. @param alphabet which alphabet should be used
  158. @param behavior the behavior when an error was detected
  159. @returns the iterator to the next element past the last element copied
  160. @throws base64_error depending on the set behavior
  161. @throws see `Input_iterator` and `Output_iterator`
  162. */
  163. template<typename Input_iterator, typename Output_iterator>
  164. static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
  165. alphabet alphabet = alphabet::auto_,
  166. decoding_behavior behavior = decoding_behavior::moderate)
  167. {
  168. //constexpr auto pad = '=';
  169. std::uint8_t last = 0;
  170. auto bits = 0;
  171. while (in_begin != in_end) {
  172. auto c = *in_begin;
  173. ++in_begin;
  174. if (c == '=') {
  175. break;
  176. }
  177. auto part = _base64_value(alphabet, c);
  178. // enough bits for one byte
  179. if (bits + 6 >= 8) {
  180. *out = (last << (8 - bits)) | (part >> (bits - 2));
  181. ++out;
  182. bits -= 2;
  183. } else {
  184. bits += 6;
  185. }
  186. last = part;
  187. }
  188. // check padding
  189. if (behavior != decoding_behavior::loose) {
  190. while (in_begin != in_end) {
  191. auto c = *in_begin;
  192. ++in_begin;
  193. if (c != '=') {
  194. throw base64_error("invalid base64 character.");
  195. }
  196. }
  197. }
  198. return out;
  199. }
  200. /**
  201. Decodes a string.
  202. @param str the base64 encoded string
  203. @param alphabet which alphabet should be used
  204. @param behavior the behavior when an error was detected
  205. @returns the decoded string
  206. @throws see base64::decode()
  207. */
  208. static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
  209. decoding_behavior behavior = decoding_behavior::moderate)
  210. {
  211. std::string result;
  212. result.reserve(max_decode_size(str.length()));
  213. decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior);
  214. return result;
  215. }
  216. /**
  217. Decodes a string.
  218. @param buffer the base64 encoded buffer
  219. @param size the size of the buffer
  220. @param alphabet which alphabet should be used
  221. @param behavior the behavior when an error was detected
  222. @returns the decoded string
  223. @throws see base64::decode()
  224. */
  225. static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
  226. decoding_behavior behavior = decoding_behavior::moderate)
  227. {
  228. std::string result;
  229. result.reserve(max_decode_size(size));
  230. decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior);
  231. return result;
  232. }
  233. /**
  234. Decodes a string inplace.
  235. @param[in,out] str the base64 encoded string
  236. @param alphabet which alphabet should be used
  237. @param behavior the behavior when an error was detected
  238. @throws base64::decode_inplace()
  239. */
  240. static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
  241. decoding_behavior behavior = decoding_behavior::moderate)
  242. {
  243. str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin());
  244. }
  245. /**
  246. Decodes a char array inplace.
  247. @param[in,out] str the string array
  248. @param size the length of the array
  249. @param alphabet which alphabet should be used
  250. @param behavior the behavior when an error was detected
  251. @returns the pointer to the next element past the last element decoded
  252. @throws base64::decode_inplace()
  253. */
  254. static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
  255. decoding_behavior behavior = decoding_behavior::moderate)
  256. {
  257. return decode(str, str + size, str, alphabet, behavior);
  258. }
  259. /**
  260. Returns the required decoding size for a given size. The value is calculated with the following formula:
  261. $$
  262. \lceil \frac{size}{4} \rceil \cdot 3
  263. $$
  264. @param size the size of the encoded input
  265. @returns the size of the resulting decoded buffer; this the absolute maximum
  266. */
  267. static std::size_t max_decode_size(std::size_t size) noexcept
  268. {
  269. return (size / 4 + (size % 4 ? 1 : 0)) * 3;
  270. }
  271. /**
  272. Returns the required encoding size for a given size. The value is calculated with the following formula:
  273. $$
  274. \lceil \frac{size}{3} \rceil \cdot 4
  275. $$
  276. @param size the size of the decoded input
  277. @returns the size of the resulting encoded buffer
  278. */
  279. static std::size_t required_encode_size(std::size_t size) noexcept
  280. {
  281. return (size / 3 + (size % 3 ? 1 : 0)) * 4;
  282. }
  283. private:
  284. static std::uint8_t _base64_value(alphabet& alphabet, char c)
  285. {
  286. if (c >= 'A' && c <= 'Z') {
  287. return c - 'A';
  288. } else if (c >= 'a' && c <= 'z') {
  289. return c - 'a' + 26;
  290. } else if (c >= '0' && c <= '9') {
  291. return c - '0' + 52;
  292. }
  293. // comes down to alphabet
  294. if (alphabet == alphabet::standard) {
  295. if (c == '+') {
  296. return 62;
  297. } else if (c == '/') {
  298. return 63;
  299. }
  300. } else if (alphabet == alphabet::url_filename_safe) {
  301. if (c == '-') {
  302. return 62;
  303. } else if (c == '_') {
  304. return 63;
  305. }
  306. } // auto detect
  307. else {
  308. if (c == '+') {
  309. alphabet = alphabet::standard;
  310. return 62;
  311. } else if (c == '/') {
  312. alphabet = alphabet::standard;
  313. return 63;
  314. } else if (c == '-') {
  315. alphabet = alphabet::url_filename_safe;
  316. return 62;
  317. } else if (c == '_') {
  318. alphabet = alphabet::url_filename_safe;
  319. return 63;
  320. }
  321. }
  322. throw base64_error("invalid base64 character.");
  323. }
  324. };
  325. #endif // !PUBLIC_DOMAIN_BASE64_HPP_