NAWA  0.8
Web Application Framework for C++
utils.cpp
Go to the documentation of this file.
1 
6 /*
7  * Copyright (C) 2019-2021 Tobias Flaig.
8  *
9  * This file is part of nawa.
10  *
11  * nawa is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License,
13  * version 3, as published by the Free Software Foundation.
14  *
15  * nawa is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with nawa. If not, see <https://www.gnu.org/licenses/>.
22  */
23 
24 #include <boost/algorithm/string.hpp>
25 #include <fstream>
26 #include <iomanip>
27 #include <nawa/Exception.h>
28 #include <nawa/util/encoding.h>
29 #include <nawa/util/utils.h>
30 #include <unordered_map>
31 
32 using namespace nawa;
33 using namespace std;
34 
35 namespace {
36  unordered_map<string, string> const contentTypeMap = {
37  {"aac", "audio/aac"},
38  {"arc", "application/x-freearc"},
39  {"avi", "video/x-msvideo"},
40  {"azw", "application/vnd.amazon.ebook"},
41  {"bmp", "image/bmp"},
42  {"bz", "application/x-bzip"},
43  {"bz2", "application/x-bzip2"},
44  {"csh", "application/x-csh"},
45  {"css", "text/css"},
46  {"csv", "text/csv"},
47  {"deb", "application/vnd.debian.binary-package"},
48  {"doc", "application/msword"},
49  {"dot", "application/msword"},
50  {"docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
51  {"dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
52  {"eot", "application/vnd.ms-fontobject"},
53  {"epub", "application/epub+zip"},
54  {"flv", "video/x-flv"},
55  {"f4v", "video/mp4"},
56  {"f4a", "audio/mp4"},
57  {"gif", "image/gif"},
58  {"gz", "application/x-gzip"},
59  {"htm", "text/html"},
60  {"html", "text/html"},
61  {"ico", "image/vnd.microsoft.icon"},
62  {"ics", "text/calendar"},
63  {"jar", "application/java-archive"},
64  {"java", "text/plain"},
65  {"jpg", "image/jpeg"},
66  {"jpeg", "image/jpeg"},
67  {"js", "text/javascript"},
68  {"json", "application/json"},
69  {"mid", "audio/x-midi"},
70  {"midi", "audio/x-midi"},
71  {"mjs", "application/javascript"},
72  {"mp3", "audio/mpeg"},
73  {"mpeg", "video/mpeg"},
74  {"mp4", "application/mp4"},
75  {"m4v", "video/mp4"},
76  {"m4a", "audio/mp4"},
77  {"mpkg", "application/vnd.apple.installer+xml"},
78  {"odp", "application/vnd.oasis.opendocument.presentation"},
79  {"otp", "application/vnd.oasis.opendocument.presentation"},
80  {"ods", "application/vnd.oasis.opendocument.spreadsheet"},
81  {"ots", "application/vnd.oasis.opendocument.spreadsheet"},
82  {"odt", "application/vnd.oasis.opendocument.text"},
83  {"ott", "application/vnd.oasis.opendocument.text"},
84  {"ogg", "application/ogg"},
85  {"ogx", "application/ogg"},
86  {"oga", "audio/ogg"},
87  {"ogv", "video/ogg"},
88  {"otf", "font/otf"},
89  {"png", "image/png"},
90  {"pdf", "application/pdf"},
91  {"ppt", "application/vnd.ms-powerpoint"},
92  {"pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"},
93  {"rar", "application/x-rar-compressed"},
94  {"rtf", "application/rtf"},
95  {"sh", "application/x-sh"},
96  {"svg", "image/svg+xml"},
97  {"swf", "application/x-shockwave-flash"},
98  {"tar", "application/x-tar"},
99  {"tif", "image/tiff"},
100  {"tiff", "image/tiff"},
101  {"ttf", "font/ttf"},
102  {"txt", "text/plain"},
103  {"vsd", "application/vnd.visio"},
104  {"wav", "audio/wav"},
105  {"weba", "audio/webm"},
106  {"webm", "video/webm"},
107  {"webp", "image/webp"},
108  {"woff", "font/woff"},
109  {"woff2", "font/woff2"},
110  {"xhtml", "application/xhtml+xml"},
111  {"xls", "application/vnd.ms-excel"},
112  {"xlt", "application/vnd.ms-excel"},
113  {"xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
114  {"xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
115  {"xml", "application/xml"},
116  {"xul", "application/vnd.mozilla.xul+xml"},
117  {"xz", "application/x-xz"},
118  {"zip", "application/zip"},
119  {"3gp", "video/3gpp"},
120  {"3g2", "video/3gpp2"},
121  {"7z", "application/x-7z-compressed"}};
122 
129  inline string getDayOfWeek(int dow) {
130  string ret;
131  switch (dow) {
132  case 0:
133  ret = "Sun";
134  break;
135  case 1:
136  ret = "Mon";
137  break;
138  case 2:
139  ret = "Tue";
140  break;
141  case 3:
142  ret = "Wed";
143  break;
144  case 4:
145  ret = "Thu";
146  break;
147  case 5:
148  ret = "Fri";
149  break;
150  case 6:
151  ret = "Sat";
152  break;
153  default:
154  break;
155  }
156  return ret;
157  }
158 
165  inline string getMonth(int mon) {
166  string ret;
167  switch (mon) {
168  case 0:
169  ret = "Jan";
170  break;
171  case 1:
172  ret = "Feb";
173  break;
174  case 2:
175  ret = "Mar";
176  break;
177  case 3:
178  ret = "Apr";
179  break;
180  case 4:
181  ret = "May";
182  break;
183  case 5:
184  ret = "Jun";
185  break;
186  case 6:
187  ret = "Jul";
188  break;
189  case 7:
190  ret = "Aug";
191  break;
192  case 8:
193  ret = "Sep";
194  break;
195  case 9:
196  ret = "Oct";
197  break;
198  case 10:
199  ret = "Nov";
200  break;
201  case 11:
202  ret = "Dec";
203  break;
204  default:
205  break;
206  }
207  return ret;
208  }
209 
210 }// namespace
211 
212 // doxygen bug, somehow doxygen does not like std::function
213 void utils::regexReplaceCallback(std::string& s, std::regex const& rgx,
214  std::function<std::string(std::vector<std::string> const&)> const& fmt) {
215  // how many submatches do we have to deal with?
216  int marks = rgx.mark_count();
217  // we want to iterate through all submatches (to collect them in a vector passed to fmt())
218  vector<int> submatchList;
219  for (int i = -1; i <= marks; ++i) {
220  submatchList.push_back(i);
221  }
222 
223  sregex_token_iterator begin(s.begin(), s.end(), rgx, submatchList), end;
224  stringstream out;
225 
226  // prefixes and submatches (should) alternate
227  int submatch = -1;
228  vector<string> submatchVector;
229  for (auto it = begin; it != end; ++it) {
230  if (submatch == -1) {
231  out << it->str();
232  ++submatch;
233  } else {
234  submatchVector.push_back(it->str());
235  if (submatch < marks) {
236  ++submatch;
237  } else {
238  out << fmt(submatchVector);
239  submatchVector.clear();
240  submatch = -1;
241  }
242  }
243  }
244  s = out.str();
245 }
246 
247 string utils::hexDump(string const& in) {
248  stringstream rets;
249  rets << hex << setfill('0');
250  for (char c : in) {
251  rets << setw(2) << (int) (unsigned char) c;
252  }
253  return rets.str();
254 }
255 
256 string utils::toLowercase(string s) {
257  transform(s.begin(), s.end(), s.begin(), ::tolower);
258  return s;
259 }
260 
261 string utils::toUppercase(string s) {
262  transform(s.begin(), s.end(), s.begin(), ::toupper);
263  return s;
264 }
265 
266 string utils::generateErrorPage(unsigned int httpStatus) {
267  string errorStr;
268  string explanation;
269  switch (httpStatus) {
270  case 400:
271  errorStr = "Bad Request";
272  explanation = "The server cannot process your request.";
273  break;
274  case 401:
275  errorStr = "Unauthorized";
276  explanation = "The necessary credentials have not been provided.";
277  break;
278  case 403:
279  errorStr = "Forbidden";
280  explanation = "You do not have the necessary permissions to view this page.";
281  break;
282  case 404:
283  errorStr = "Not Found";
284  explanation = "The requested URL was not found on this server.";
285  break;
286  case 405:
287  errorStr = "Method Not Allowed";
288  explanation = "The used request method is not supported for the requested resource.";
289  break;
290  case 406:
291  errorStr = "Not Applicable";
292  explanation = "The requested function is unable to produce a resource that satisfies your browser's Accept header.";
293  break;
294  case 408:
295  errorStr = "Request Timeout";
296  explanation = "A timeout occurred while waiting for your request.";
297  break;
298  case 409:
299  errorStr = "Conflict";
300  explanation = "The request cannot be processed due to a conflict on the underlying resource.";
301  break;
302  case 410:
303  errorStr = "Gone";
304  explanation = "The requested resource is no longer available.";
305  break;
306  case 415:
307  errorStr = "Unsupported Media Type";
308  explanation = "Your browser has requested a media type that cannot be provided by this resource.";
309  break;
310  case 418:
311  errorStr = "I'm a teapot";
312  explanation = "I cannot brew coffee for you.";
313  break;
314  case 429:
315  errorStr = "Too Many Requests";
316  break;
317  case 451:
318  errorStr = "Unavailable For Legal Reasons";
319  break;
320  case 500:
321  errorStr = "Internal Server Error";
322  explanation = "The server encountered an internal error and is unable to fulfill your request.";
323  break;
324  case 501:
325  errorStr = "Not Implemented";
326  explanation = "The server is not able to fulfill your request.";
327  break;
328  case 503:
329  errorStr = "Service Unavailable";
330  explanation = "This service is currently unavailable. Please try again later.";
331  break;
332  default:
333  errorStr = "Unknown Error";
334  }
335 
336  stringstream ep;
337  ep << "<!DOCTYPE html><html><head><title>" << httpStatus << ' ' << errorStr << "</title></head><body><h1>"
338  << errorStr << "</h1><p>" << explanation << "</p></body></html>";
339 
340  return ep.str();
341 }
342 
343 string utils::getFileExtension(string const& filename) {
344  try {
345  return filename.substr(filename.find_last_of('.') + 1);
346  } catch (out_of_range&) {}
347 
348  return {};
349 }
350 
351 string utils::contentTypeByExtension(string extension) {
352  auto ext = toLowercase(move(extension));
353  if (contentTypeMap.count(ext) == 1) {
354  return contentTypeMap.at(ext);
355  }
356  return "application/octet-stream";
357 }
358 
359 string utils::makeHttpTime(time_t time) {
360  stringstream httpTime;
361  tm gmt{};
362  auto retPtr = gmtime_r(&time, &gmt);
363  if (retPtr == nullptr) {
364  throw Exception(__PRETTY_FUNCTION__, 1, "Interpretation of UNIX timestamp failed.", strerror(errno));
365  }
366 
367  httpTime << getDayOfWeek(gmt.tm_wday) << put_time(&gmt, ", %d ") << getMonth(gmt.tm_mon);
368  httpTime << put_time(&gmt, " %Y %H:%M:%S GMT");
369 
370  return httpTime.str();
371 }
372 
373 time_t utils::readHttpTime(string const& httpTime) {
374  tm timeStruct{};
375  istringstream timeStream(httpTime);
376  timeStream.exceptions(ifstream::failbit);
377  try {
378  timeStream >> get_time(&timeStruct, "%a, %d %b %Y %H:%M:%S GMT");
379  } catch (ios_base::failure const& e) {
380  throw Exception(__PRETTY_FUNCTION__, 1, "Parsing of HTTP timestamp failed.", e.what());
381  }
382 
383  // timegm will interpret the tm as UTC and convert it to a time_t
384  time_t unixTime = timegm(&timeStruct);
385  if (unixTime == -1) {
386  throw Exception(__PRETTY_FUNCTION__, 1, "Conversion of parsed HTTP timestamp to a UNIX timestamp failed.", strerror(errno));
387  }
388 
389  return unixTime;
390 }
391 
392 string utils::makeSmtpTime(time_t time) {
393  stringstream smtpTime;
394  tm ltime{};
395  auto retPtr = localtime_r(&time, &ltime);
396  if (retPtr == nullptr) {
397  throw Exception(__PRETTY_FUNCTION__, 1, "Interpretation of UNIX timestamp failed.", strerror(errno));
398  }
399  smtpTime << getDayOfWeek(ltime.tm_wday) << put_time(&ltime, ", %e ") << getMonth(ltime.tm_mon);
400  smtpTime << put_time(&ltime, " %Y %H:%M:%S %z");
401 
402  return smtpTime.str();
403 }
404 
405 time_t utils::readSmtpTime(string const& smtpTime) {
406  string smtpTimeM = smtpTime;
407  tm timeStruct{};
408 
409  // there seems to be a bug in get_time, %e parsing with leading space does not work, so this fails for
410  // days of month < 10:
411  //timeStream >> get_time(&timeStruct, "%a, %e %b %Y %H:%M:%S %z");
412 
413  // dirty hack
414  if (smtpTimeM.length() > 5 && smtpTimeM[5] == ' ') {
415  smtpTimeM[5] = '0';
416  }
417  istringstream timeStream(smtpTimeM);
418  timeStream.exceptions(ifstream::failbit);
419  try {
420  // time zone modifier %z at the end will also cause trouble, but is not needed anyway (as the TZ is not part of tm)
421  timeStream >> get_time(&timeStruct, "%a, %d %b %Y %H:%M:%S");
422  } catch (ios_base::failure const& e) {
423  throw Exception(__PRETTY_FUNCTION__, 1, "Parsing of SMTP timestamp failed.", e.what());
424  }
425 
426  // timegm will create a time_t, but does not honor the time zone, unfortunately (not part of tm)
427  time_t unixTime = timegm(&timeStruct);
428  if (unixTime == -1) {
429  throw Exception(__PRETTY_FUNCTION__, 1, "Conversion of parsed SMTP timestamp to a UNIX timestamp failed.", strerror(errno));
430  }
431 
432  // so we'll have to add/subtract the difference manually
433  if (smtpTimeM.length() > 30) {
434  try {
435  long tzAdjust = smtpTimeM[26] == '-' ? 1 : -1;
436  long tzH = stol(smtpTimeM.substr(27, 2));
437  long tzM = stol(smtpTimeM.substr(29, 2));
438  unixTime += tzAdjust * (tzH * 3600 + tzM * 60);
439  } catch (invalid_argument const& e) {
440  throw Exception(__PRETTY_FUNCTION__, 1, "Timezone adjustment in parsing of SMTP timestamp failed.", e.what());
441  }
442  }
443 
444  // mktime will interpret the tm as local time and convert it to a time_t
445  return unixTime;
446 }
447 
448 vector<string> utils::splitString(string str, char delimiter, bool ignoreEmpty) {
449  vector<string> ret;
450  for (size_t pos = 0; !str.empty();) {
451  pos = str.find_first_of(delimiter);
452  auto token = str.substr(0, pos);
453  if (!ignoreEmpty || !token.empty()) {
454  ret.push_back(str.substr(0, pos));
455  }
456  if (pos < str.length()) {
457  str = str.substr(pos + 1);
458  } else {
459  break;
460  }
461  }
462  return ret;
463 }
464 
465 string utils::mergePath(vector<string> const& path) {
466  if (path.empty()) {
467  return "/";
468  }
469  stringstream stringPath;
470  for (auto const& e : path) {
471  stringPath << '/' << e;
472  }
473  return stringPath.str();
474 }
475 
476 vector<string> utils::splitPath(string const& pathString) {
477  // remove query string
478  string rawPath = pathString.substr(0, pathString.find('?'));
479  return splitString(rawPath, '/', true);
480 }
481 
482 string utils::convertLineEndings(string const& in, string const& ending) {
483  stringstream ret;
484  for (const auto& c : in) {
485  if (c == '\n')
486  ret << ending;
487  else if (c != '\r')
488  ret << c;
489  }
490  return ret.str();
491 }
492 
493 string utils::getFileContents(string const& path) {
494  // open file as binary
495  ifstream f(path, ifstream::binary);
496 
497  // throw exception if file cannot be opened
498  if (!f) {
499  throw Exception(__PRETTY_FUNCTION__, 1, "Cannot open file for reading");
500  }
501 
502  // get file size
503  f.seekg(0, ios::end);
504  long fs = f.tellg();
505  f.seekg(0);
506 
507  // load to string
508  string ret(static_cast<unsigned long>(fs), '\0');
509  f.read(&ret[0], fs);
510 
511  return ret;
512 }
513 
514 string utils::stringReplace(string input, unordered_map<char, char> const& patterns) {
515  for (auto const& [key, val] : patterns) {
516  replace(input.begin(), input.end(), key, val);
517  }
518  return input;
519 }
520 
521 string utils::stringReplace(string input, unordered_map<string, string> const& patterns) {
522  for (auto const& [key, val] : patterns) {
523  for (size_t pos = input.find(key); pos != string::npos;) {
524  input.replace(pos, key.length(), val);
525  pos = input.find(key, pos + val.length());
526  }
527  }
528  return input;
529 }
530 
531 unordered_multimap<string, string> utils::splitQueryString(string const& queryString) {
532  string qs;
533  size_t qmrkPos = queryString.find_first_of('?');
534  unordered_multimap<string, string> ret;
535  if (qmrkPos != string::npos && queryString.length() > qmrkPos) {
536  qs = queryString.substr(qmrkPos + 1);
537  } else if (qmrkPos == string::npos) {
538  qs = queryString;
539  }
540  auto pairs = splitString(qs, '&', true);
541  for (auto const& p : pairs) {
542  size_t eqPos = p.find_first_of('=');
543  string k = p.substr(0, eqPos);
544  string v = (eqPos < p.length() - 1) ? encoding::urlDecode(p.substr(eqPos + 1)) : "";
545  ret.insert({k, v});
546  }
547  return ret;
548 }
549 
550 unordered_map<string, string> utils::parseHeaders(string rawHeaders) {
551  unordered_map<string, string> ret;
552  // filter out carriage returns
553  boost::erase_all(rawHeaders, "\r");
554  // split
555  auto lines = splitString(rawHeaders, '\n', true);
556  for (auto const& line : lines) {
557  auto colonPos = line.find_first_of(':');
558  if (line.length() < colonPos + 2) {
559  continue;
560  }
561  auto key = toLowercase(line.substr(0, colonPos));
562  auto val = line.substr(colonPos + 1);
563  boost::trim_left(val);
564  ret[key] = val;
565  }
566  return ret;
567 }
568 
569 unordered_multimap<std::string, std::string> utils::parseCookies(string const& rawCookies) {
570  unordered_multimap<std::string, std::string> ret;
571  // split by ;
572  auto cookies = splitString(rawCookies, ';', true);
573  for (auto c : cookies) {
574  // remove whitespaces
575  boost::trim(c);
576  // key and value
577  auto eqPos = c.find_first_of('=');
578  if (c.length() < eqPos + 2) {
579  continue;
580  }
581  auto key = c.substr(0, eqPos);
582  auto val = c.substr(eqPos + 1);
583  ret.insert({key, val});
584  }
585  return ret;
586 }
Exception class that can be used by apps to catch errors resulting from nawa function calls.
Namespace containing functions for text encoding and decoding.
std::string urlDecode(std::string input)
Definition: encoding.cpp:256
void regexReplaceCallback(std::string &s, std::regex const &rgx, std::function< std::string(std::vector< std::string > const &)> const &fmt)
std::string hexDump(std::string const &in)
Definition: utils.cpp:247
std::string getFileContents(std::string const &path)
Definition: utils.cpp:493
time_t readSmtpTime(std::string const &smtpTime)
Definition: utils.cpp:405
std::unordered_multimap< std::string, std::string > splitQueryString(std::string const &queryString)
Definition: utils.cpp:531
std::string convertLineEndings(std::string const &in, std::string const &ending)
Definition: utils.cpp:482
time_t readHttpTime(std::string const &httpTime)
Definition: utils.cpp:373
std::unordered_multimap< std::string, std::string > parseCookies(std::string const &rawCookies)
Definition: utils.cpp:569
std::string stringReplace(std::string input, std::unordered_map< char, char > const &patterns)
Definition: utils.cpp:514
std::string generateErrorPage(unsigned int httpStatus)
Definition: utils.cpp:266
std::vector< std::string > splitPath(std::string const &pathString)
Definition: utils.cpp:476
std::string toLowercase(std::string s)
Definition: utils.cpp:256
std::string contentTypeByExtension(std::string extension)
Definition: utils.cpp:351
std::string toUppercase(std::string s)
Definition: utils.cpp:261
std::string getFileExtension(std::string const &filename)
Definition: utils.cpp:343
std::string makeHttpTime(time_t time)
Definition: utils.cpp:359
std::vector< std::string > splitString(std::string str, char delimiter, bool ignoreEmpty=false)
Definition: utils.cpp:448
std::string makeSmtpTime(time_t time)
Definition: utils.cpp:392
std::string mergePath(std::vector< std::string > const &path)
Definition: utils.cpp:465
std::unordered_map< std::string, std::string > parseHeaders(std::string rawHeaders)
Definition: utils.cpp:550
Definition: AppInit.h:31
Contains useful functions that improve the readability and facilitate maintenance of the NAWA code.