NAWA 0.9
Web Application Framework for C++
utils.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022 Tobias Flaig.
3 *
4 * This file is part of nawa.
5 *
6 * nawa is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License,
8 * version 3, as published by the Free Software Foundation.
9 *
10 * nawa is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with nawa. If not, see <https://www.gnu.org/licenses/>.
17 */
18
24#include <boost/algorithm/string.hpp>
25#include <fstream>
26#include <iomanip>
27#include <nawa/Exception.h>
28#include <nawa/util/encoding.h>
29#include <nawa/util/utils.h>
30#include <unordered_map>
31
32using namespace nawa;
33using namespace std;
34
35namespace {
36 unordered_map<string, string> const contentTypeMap = {
37 {"aac", "audio/aac"},
38 {"arc", "application/x-freearc"},
39 {"avi", "video/x-msvideo"},
40 {"azw", "application/vnd.amazon.ebook"},
41 {"bmp", "image/bmp"},
42 {"bz", "application/x-bzip"},
43 {"bz2", "application/x-bzip2"},
44 {"csh", "application/x-csh"},
45 {"css", "text/css"},
46 {"csv", "text/csv"},
47 {"deb", "application/vnd.debian.binary-package"},
48 {"doc", "application/msword"},
49 {"dot", "application/msword"},
50 {"docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
51 {"dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
52 {"eot", "application/vnd.ms-fontobject"},
53 {"epub", "application/epub+zip"},
54 {"flv", "video/x-flv"},
55 {"f4v", "video/mp4"},
56 {"f4a", "audio/mp4"},
57 {"gif", "image/gif"},
58 {"gz", "application/x-gzip"},
59 {"htm", "text/html"},
60 {"html", "text/html"},
61 {"ico", "image/vnd.microsoft.icon"},
62 {"ics", "text/calendar"},
63 {"jar", "application/java-archive"},
64 {"java", "text/plain"},
65 {"jpg", "image/jpeg"},
66 {"jpeg", "image/jpeg"},
67 {"js", "text/javascript"},
68 {"json", "application/json"},
69 {"mid", "audio/x-midi"},
70 {"midi", "audio/x-midi"},
71 {"mjs", "application/javascript"},
72 {"mp3", "audio/mpeg"},
73 {"mpeg", "video/mpeg"},
74 {"mp4", "application/mp4"},
75 {"m4v", "video/mp4"},
76 {"m4a", "audio/mp4"},
77 {"mpkg", "application/vnd.apple.installer+xml"},
78 {"odp", "application/vnd.oasis.opendocument.presentation"},
79 {"otp", "application/vnd.oasis.opendocument.presentation"},
80 {"ods", "application/vnd.oasis.opendocument.spreadsheet"},
81 {"ots", "application/vnd.oasis.opendocument.spreadsheet"},
82 {"odt", "application/vnd.oasis.opendocument.text"},
83 {"ott", "application/vnd.oasis.opendocument.text"},
84 {"ogg", "application/ogg"},
85 {"ogx", "application/ogg"},
86 {"oga", "audio/ogg"},
87 {"ogv", "video/ogg"},
88 {"otf", "font/otf"},
89 {"png", "image/png"},
90 {"pdf", "application/pdf"},
91 {"ppt", "application/vnd.ms-powerpoint"},
92 {"pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"},
93 {"rar", "application/x-rar-compressed"},
94 {"rtf", "application/rtf"},
95 {"sh", "application/x-sh"},
96 {"svg", "image/svg+xml"},
97 {"swf", "application/x-shockwave-flash"},
98 {"tar", "application/x-tar"},
99 {"tif", "image/tiff"},
100 {"tiff", "image/tiff"},
101 {"ttf", "font/ttf"},
102 {"txt", "text/plain"},
103 {"vsd", "application/vnd.visio"},
104 {"wav", "audio/wav"},
105 {"weba", "audio/webm"},
106 {"webm", "video/webm"},
107 {"webp", "image/webp"},
108 {"woff", "font/woff"},
109 {"woff2", "font/woff2"},
110 {"xhtml", "application/xhtml+xml"},
111 {"xls", "application/vnd.ms-excel"},
112 {"xlt", "application/vnd.ms-excel"},
113 {"xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
114 {"xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
115 {"xml", "application/xml"},
116 {"xul", "application/vnd.mozilla.xul+xml"},
117 {"xz", "application/x-xz"},
118 {"zip", "application/zip"},
119 {"3gp", "video/3gpp"},
120 {"3g2", "video/3gpp2"},
121 {"7z", "application/x-7z-compressed"}};
122
129 inline string getDayOfWeek(int dow) {
130 string ret;
131 switch (dow) {
132 case 0:
133 ret = "Sun";
134 break;
135 case 1:
136 ret = "Mon";
137 break;
138 case 2:
139 ret = "Tue";
140 break;
141 case 3:
142 ret = "Wed";
143 break;
144 case 4:
145 ret = "Thu";
146 break;
147 case 5:
148 ret = "Fri";
149 break;
150 case 6:
151 ret = "Sat";
152 break;
153 default:
154 break;
155 }
156 return ret;
157 }
158
165 inline string getMonth(int mon) {
166 string ret;
167 switch (mon) {
168 case 0:
169 ret = "Jan";
170 break;
171 case 1:
172 ret = "Feb";
173 break;
174 case 2:
175 ret = "Mar";
176 break;
177 case 3:
178 ret = "Apr";
179 break;
180 case 4:
181 ret = "May";
182 break;
183 case 5:
184 ret = "Jun";
185 break;
186 case 6:
187 ret = "Jul";
188 break;
189 case 7:
190 ret = "Aug";
191 break;
192 case 8:
193 ret = "Sep";
194 break;
195 case 9:
196 ret = "Oct";
197 break;
198 case 10:
199 ret = "Nov";
200 break;
201 case 11:
202 ret = "Dec";
203 break;
204 default:
205 break;
206 }
207 return ret;
208 }
209
210}// namespace
211
212// doxygen bug, somehow doxygen does not like std::function
213void utils::regexReplaceCallback(std::string& s, std::regex const& rgx,
214 std::function<std::string(std::vector<std::string> const&)> const& fmt) {
215 // how many submatches do we have to deal with?
216 int marks = rgx.mark_count();
217 // we want to iterate through all submatches (to collect them in a vector passed to fmt())
218 vector<int> submatchList;
219 for (int i = -1; i <= marks; ++i) {
220 submatchList.push_back(i);
221 }
222
223 sregex_token_iterator begin(s.begin(), s.end(), rgx, submatchList), end;
224 stringstream out;
225
226 // prefixes and submatches (should) alternate
227 int submatch = -1;
228 vector<string> submatchVector;
229 for (auto it = begin; it != end; ++it) {
230 if (submatch == -1) {
231 out << it->str();
232 ++submatch;
233 } else {
234 submatchVector.push_back(it->str());
235 if (submatch < marks) {
236 ++submatch;
237 } else {
238 out << fmt(submatchVector);
239 submatchVector.clear();
240 submatch = -1;
241 }
242 }
243 }
244 s = out.str();
245}
246
247std::string utils::hexDump(std::string const& in) {
248 stringstream rets;
249 rets << hex << setfill('0');
250 for (char c : in) {
251 rets << setw(2) << (int) (unsigned char) c;
252 }
253 return rets.str();
254}
255
256std::string utils::toLowercase(std::string s) {
257 transform(s.begin(), s.end(), s.begin(), ::tolower);
258 return s;
259}
260
261std::string utils::toUppercase(std::string s) {
262 transform(s.begin(), s.end(), s.begin(), ::toupper);
263 return s;
264}
265
266std::string utils::generateErrorPage(unsigned int httpStatus) {
267 string errorStr;
268 string explanation;
269 switch (httpStatus) {
270 case 400:
271 errorStr = "Bad Request";
272 explanation = "The server cannot process your request.";
273 break;
274 case 401:
275 errorStr = "Unauthorized";
276 explanation = "The necessary credentials have not been provided.";
277 break;
278 case 403:
279 errorStr = "Forbidden";
280 explanation = "You do not have the necessary permissions to view this page.";
281 break;
282 case 404:
283 errorStr = "Not Found";
284 explanation = "The requested URL was not found on this server.";
285 break;
286 case 405:
287 errorStr = "Method Not Allowed";
288 explanation = "The used request method is not supported for the requested resource.";
289 break;
290 case 406:
291 errorStr = "Not Applicable";
292 explanation = "The requested function is unable to produce a resource that satisfies your browser's Accept header.";
293 break;
294 case 408:
295 errorStr = "Request Timeout";
296 explanation = "A timeout occurred while waiting for your request.";
297 break;
298 case 409:
299 errorStr = "Conflict";
300 explanation = "The request cannot be processed due to a conflict on the underlying resource.";
301 break;
302 case 410:
303 errorStr = "Gone";
304 explanation = "The requested resource is no longer available.";
305 break;
306 case 415:
307 errorStr = "Unsupported Media Type";
308 explanation = "Your browser has requested a media type that cannot be provided by this resource.";
309 break;
310 case 418:
311 errorStr = "I'm a teapot";
312 explanation = "I cannot brew coffee for you.";
313 break;
314 case 429:
315 errorStr = "Too Many Requests";
316 break;
317 case 451:
318 errorStr = "Unavailable For Legal Reasons";
319 break;
320 case 500:
321 errorStr = "Internal Server Error";
322 explanation = "The server encountered an internal error and is unable to fulfill your request.";
323 break;
324 case 501:
325 errorStr = "Not Implemented";
326 explanation = "The server is not able to fulfill your request.";
327 break;
328 case 503:
329 errorStr = "Service Unavailable";
330 explanation = "This service is currently unavailable. Please try again later.";
331 break;
332 default:
333 errorStr = "Unknown Error";
334 }
335
336 stringstream ep;
337 ep << "<!DOCTYPE html><html><head><title>" << httpStatus << ' ' << errorStr << "</title></head><body><h1>"
338 << errorStr << "</h1><p>" << explanation << "</p></body></html>";
339
340 return ep.str();
341}
342
343std::string utils::getFileExtension(std::string const& filename) {
344 try {
345 return filename.substr(filename.find_last_of('.') + 1);
346 } catch (out_of_range&) {}
347
348 return {};
349}
350
351std::string utils::contentTypeByExtension(std::string extension) {
352 auto ext = toLowercase(std::move(extension));
353 if (contentTypeMap.count(ext) == 1) {
354 return contentTypeMap.at(ext);
355 }
356 return "application/octet-stream";
357}
358
359std::string utils::makeHttpTime(time_t time) {
360 stringstream httpTime;
361 tm gmt{};
362 auto retPtr = gmtime_r(&time, &gmt);
363 if (retPtr == nullptr) {
364 throw Exception(__PRETTY_FUNCTION__, 1, "Interpretation of UNIX timestamp failed.", strerror(errno));
365 }
366
367 httpTime << getDayOfWeek(gmt.tm_wday) << put_time(&gmt, ", %d ") << getMonth(gmt.tm_mon);
368 httpTime << put_time(&gmt, " %Y %H:%M:%S GMT");
369
370 return httpTime.str();
371}
372
373time_t utils::readHttpTime(std::string const& httpTime) {
374 tm timeStruct{};
375 istringstream timeStream(httpTime);
376 timeStream.exceptions(ifstream::failbit);
377 try {
378 timeStream >> get_time(&timeStruct, "%a, %d %b %Y %H:%M:%S GMT");
379 } catch (ios_base::failure const& e) {
380 throw Exception(__PRETTY_FUNCTION__, 1, "Parsing of HTTP timestamp failed.", e.what());
381 }
382
383 // timegm will interpret the tm as UTC and convert it to a time_t
384 time_t unixTime = timegm(&timeStruct);
385 if (unixTime == -1) {
386 throw Exception(__PRETTY_FUNCTION__, 1, "Conversion of parsed HTTP timestamp to a UNIX timestamp failed.", strerror(errno));
387 }
388
389 return unixTime;
390}
391
392std::string utils::makeSmtpTime(time_t time) {
393 stringstream smtpTime;
394 tm ltime{};
395 auto retPtr = localtime_r(&time, &ltime);
396 if (retPtr == nullptr) {
397 throw Exception(__PRETTY_FUNCTION__, 1, "Interpretation of UNIX timestamp failed.", strerror(errno));
398 }
399 smtpTime << getDayOfWeek(ltime.tm_wday) << put_time(&ltime, ", %e ") << getMonth(ltime.tm_mon);
400 smtpTime << put_time(&ltime, " %Y %H:%M:%S %z");
401
402 return smtpTime.str();
403}
404
405time_t utils::readSmtpTime(std::string const& smtpTime) {
406 string smtpTimeM = smtpTime;
407 tm timeStruct{};
408
409 // there seems to be a bug in get_time, %e parsing with leading space does not work, so this fails for
410 // days of month < 10:
411 //timeStream >> get_time(&timeStruct, "%a, %e %b %Y %H:%M:%S %z");
412
413 // dirty hack
414 if (smtpTimeM.length() > 5 && smtpTimeM[5] == ' ') {
415 smtpTimeM[5] = '0';
416 }
417 istringstream timeStream(smtpTimeM);
418 timeStream.exceptions(ifstream::failbit);
419 try {
420 // time zone modifier %z at the end will also cause trouble, but is not needed anyway (as the TZ is not part of tm)
421 timeStream >> get_time(&timeStruct, "%a, %d %b %Y %H:%M:%S");
422 } catch (ios_base::failure const& e) {
423 throw Exception(__PRETTY_FUNCTION__, 1, "Parsing of SMTP timestamp failed.", e.what());
424 }
425
426 // timegm will create a time_t, but does not honor the time zone, unfortunately (not part of tm)
427 time_t unixTime = timegm(&timeStruct);
428 if (unixTime == -1) {
429 throw Exception(__PRETTY_FUNCTION__, 1, "Conversion of parsed SMTP timestamp to a UNIX timestamp failed.", strerror(errno));
430 }
431
432 // so we'll have to add/subtract the difference manually
433 if (smtpTimeM.length() > 30) {
434 try {
435 long tzAdjust = smtpTimeM[26] == '-' ? 1 : -1;
436 long tzH = stol(smtpTimeM.substr(27, 2));
437 long tzM = stol(smtpTimeM.substr(29, 2));
438 unixTime += tzAdjust * (tzH * 3600 + tzM * 60);
439 } catch (invalid_argument const& e) {
440 throw Exception(__PRETTY_FUNCTION__, 1, "Timezone adjustment in parsing of SMTP timestamp failed.", e.what());
441 }
442 }
443
444 // mktime will interpret the tm as local time and convert it to a time_t
445 return unixTime;
446}
447
448std::vector<std::string> utils::splitString(std::string str, char delimiter, bool ignoreEmpty) {
449 vector<string> ret;
450 for (size_t pos = 0; !str.empty();) {
451 pos = str.find_first_of(delimiter);
452 auto token = str.substr(0, pos);
453 if (!ignoreEmpty || !token.empty()) {
454 ret.push_back(str.substr(0, pos));
455 }
456 if (pos < str.length()) {
457 str = str.substr(pos + 1);
458 } else {
459 break;
460 }
461 }
462 return ret;
463}
464
465std::string utils::mergePath(std::vector<std::string> const& path) {
466 if (path.empty()) {
467 return "/";
468 }
469 stringstream stringPath;
470 for (auto const& e : path) {
471 stringPath << '/' << e;
472 }
473 return stringPath.str();
474}
475
476std::vector<std::string> utils::splitPath(std::string const& pathString) {
477 // remove query string
478 string rawPath = pathString.substr(0, pathString.find('?'));
479 return splitString(rawPath, '/', true);
480}
481
482std::string utils::convertLineEndings(std::string const& in, std::string const& ending) {
483 stringstream ret;
484 for (const auto& c : in) {
485 if (c == '\n')
486 ret << ending;
487 else if (c != '\r')
488 ret << c;
489 }
490 return ret.str();
491}
492
493std::string utils::getFileContents(std::string const& path) {
494 // open file as binary
495 ifstream f(path, ifstream::binary);
496
497 // throw exception if file cannot be opened
498 if (!f) {
499 throw Exception(__PRETTY_FUNCTION__, 1, "Cannot open file for reading");
500 }
501
502 // get file size
503 f.seekg(0, ios::end);
504 long fs = f.tellg();
505 f.seekg(0);
506
507 // load to string
508 string ret(static_cast<unsigned long>(fs), '\0');
509 f.read(&ret[0], fs);
510
511 return ret;
512}
513
514std::string utils::stringReplace(std::string input, std::unordered_map<char, char> const& patterns) {
515 for (auto const& [key, val] : patterns) {
516 replace(input.begin(), input.end(), key, val);
517 }
518 return input;
519}
520
521std::string utils::stringReplace(std::string input, std::unordered_map<std::string, std::string> const& patterns) {
522 for (auto const& [key, val] : patterns) {
523 for (size_t pos = input.find(key); pos != string::npos;) {
524 input.replace(pos, key.length(), val);
525 pos = input.find(key, pos + val.length());
526 }
527 }
528 return input;
529}
530
531std::unordered_multimap<std::string, std::string> utils::splitQueryString(std::string const& queryString) {
532 string qs;
533 size_t qmrkPos = queryString.find_first_of('?');
534 unordered_multimap<string, string> ret;
535 if (qmrkPos != string::npos && queryString.length() > qmrkPos) {
536 qs = queryString.substr(qmrkPos + 1);
537 } else if (qmrkPos == string::npos) {
538 qs = queryString;
539 }
540 auto pairs = splitString(qs, '&', true);
541 for (auto const& p : pairs) {
542 size_t eqPos = p.find_first_of('=');
543 string k = p.substr(0, eqPos);
544 string v = (eqPos < p.length() - 1) ? encoding::urlDecode(p.substr(eqPos + 1)) : "";
545 ret.insert({k, v});
546 }
547 return ret;
548}
549
550std::unordered_map<std::string, std::string> utils::parseHeaders(std::string rawHeaders) {
551 unordered_map<string, string> ret;
552 // filter out carriage returns
553 boost::erase_all(rawHeaders, "\r");
554 // split
555 auto lines = splitString(rawHeaders, '\n', true);
556 for (auto const& line : lines) {
557 auto colonPos = line.find_first_of(':');
558 if (line.length() < colonPos + 2) {
559 continue;
560 }
561 auto key = toLowercase(line.substr(0, colonPos));
562 auto val = line.substr(colonPos + 1);
563 boost::trim_left(val);
564 ret[key] = val;
565 }
566 return ret;
567}
568
569std::unordered_multimap<std::string, std::string> utils::parseCookies(std::string const& rawCookies) {
570 unordered_multimap<string, string> ret;
571 // split by ;
572 auto cookies = splitString(rawCookies, ';', true);
573 for (auto c : cookies) {
574 // remove whitespaces
575 boost::trim(c);
576 // key and value
577 auto eqPos = c.find_first_of('=');
578 if (c.length() < eqPos + 2) {
579 continue;
580 }
581 auto key = c.substr(0, eqPos);
582 auto val = c.substr(eqPos + 1);
583 ret.insert({key, val});
584 }
585 return ret;
586}
Exception class that can be used by apps to catch errors resulting from nawa function calls.
Namespace containing functions for text encoding and decoding.
std::string urlDecode(std::string input)
Definition: encoding.cpp:256
std::string toLowercase(std::string s)
Definition: utils.cpp:256
std::vector< std::string > splitPath(std::string const &pathString)
Definition: utils.cpp:476
std::string mergePath(std::vector< std::string > const &path)
Definition: utils.cpp:465
void regexReplaceCallback(std::string &s, std::regex const &rgx, std::function< std::string(std::vector< std::string > const &)> const &fmt)
std::unordered_map< std::string, std::string > parseHeaders(std::string rawHeaders)
Definition: utils.cpp:550
std::string hexDump(std::string const &in)
Definition: utils.cpp:247
std::string getFileExtension(std::string const &filename)
Definition: utils.cpp:343
std::string convertLineEndings(std::string const &in, std::string const &ending)
Definition: utils.cpp:482
std::string generateErrorPage(unsigned int httpStatus)
Definition: utils.cpp:266
std::string makeHttpTime(time_t time)
Definition: utils.cpp:359
time_t readSmtpTime(std::string const &smtpTime)
Definition: utils.cpp:405
time_t readHttpTime(std::string const &httpTime)
Definition: utils.cpp:373
std::string contentTypeByExtension(std::string extension)
Definition: utils.cpp:351
std::string stringReplace(std::string input, std::unordered_map< char, char > const &patterns)
Definition: utils.cpp:514
std::unordered_multimap< std::string, std::string > splitQueryString(std::string const &queryString)
Definition: utils.cpp:531
std::string getFileContents(std::string const &path)
Definition: utils.cpp:493
std::vector< std::string > splitString(std::string str, char delimiter, bool ignoreEmpty=false)
Definition: utils.cpp:448
std::string makeSmtpTime(time_t time)
Definition: utils.cpp:392
std::string toUppercase(std::string s)
Definition: utils.cpp:261
std::unordered_multimap< std::string, std::string > parseCookies(std::string const &rawCookies)
Definition: utils.cpp:569
Definition: AppInit.h:31
Contains useful functions that improve the readability and facilitate maintenance of the NAWA code.