24#include <base64/base64.h>
25#include <boost/algorithm/string.hpp>
30#include <punycode/punycode.h>
33#include <unordered_map>
34#include <unordered_set>
40 unordered_map<char32_t, u32string>
const htmlEntities =
41 {{U
'\u00C1', U
"Á"}, {U
'\u00E1', U
"á"}, {U
'\u0102', U
"Ă"}, {U
'\u0103', U
"ă"}, {U
'\u223E', U
"∾"}, {U
'\u223F', U
"∿"}, {U
'\u223E', U
""}, {U
'\u00C2', U
"Â"}, {U
'\u00E2', U
"â"}, {U
'\u00B4', U
"´"}, {U
'\u0410', U
"А"}, {U
'\u0430', U
"а"}, {U
'\u00C6', U
"Æ"}, {U
'\u00E6', U
"æ"}, {U
'\u2061', U
"⁡"}, {U
'\U0001d504', U
"𝔄"}, {U
'\U0001d51e', U
"𝔞"}, {U
'\u00C0', U
"À"}, {U
'\u00E0', U
"à"}, {U
'\u2135', U
"ℵ"}, {U
'\u0391', U
"Α"}, {U
'\u03B1', U
"α"}, {U
'\u0100', U
"Ā"}, {U
'\u0101', U
"ā"}, {U
'\u2A3F', U
"⨿"}, {U
'\u0026', U
"&"}, {U
'\u2A55', U
"⩕"}, {U
'\u2A53', U
"⩓"}, {U
'\u2227', U
"∧"}, {U
'\u2A5C', U
"⩜"}, {U
'\u2A58', U
"⩘"}, {U
'\u2A5A', U
"⩚"}, {U
'\u2220', U
"∠"}, {U
'\u29A4', U
"⦤"}, {U
'\u29A8', U
"⦨"}, {U
'\u29A9', U
"⦩"}, {U
'\u29AA', U
"⦪"}, {U
'\u29AB', U
"⦫"}, {U
'\u29AC', U
"⦬"}, {U
'\u29AD', U
"⦭"}, {U
'\u29AE', U
"⦮"}, {U
'\u29AF', U
"⦯"}, {U
'\u2221', U
"∡"}, {U
'\u221F', U
"∟"}, {U
'\u22BE', U
"⊾"}, {U
'\u299D', U
"⦝"}, {U
'\u2222', U
"∢"}, {U
'\u00C5', U
"Å"}, {U
'\u237C', U
"⍼"}, {U
'\u0104', U
"Ą"}, {U
'\u0105', U
"ą"}, {U
'\U0001d538', U
"𝔸"}, {U
'\U0001d552', U
"𝕒"}, {U
'\u2A6F', U
"⩯"}, {U
'\u2248', U
"≈"}, {U
'\u2A70', U
"⩰"}, {U
'\u224A', U
"≊"}, {U
'\u224B', U
"≋"}, {U
'\u0027', U
"'"}, {U
'\u00E5', U
"å"}, {U
'\U0001d49c', U
"𝒜"}, {U
'\U0001d4b6', U
"𝒶"}, {U
'\u2254', U
"≔"}, {U
'\u002A', U
"*"}, {U
'\u224D', U
"≍"}, {U
'\u00C3', U
"Ã"}, {U
'\u00E3', U
"ã"}, {U
'\u00C4', U
"Ä"}, {U
'\u00E4', U
"ä"}, {U
'\u2233', U
"∳"}, {U
'\u2A11', U
"⨑"}, {U
'\u224C', U
"≌"}, {U
'\u03F6', U
"϶"}, {U
'\u2035', U
"‵"}, {U
'\u223D', U
"∽"}, {U
'\u22CD', U
"⋍"}, {U
'\u2216', U
"∖"}, {U
'\u2AE7', U
"⫧"}, {U
'\u22BD', U
"⊽"}, {U
'\u2305', U
"⌅"}, {U
'\u2306', U
"⌆"}, {U
'\u23B5', U
"⎵"}, {U
'\u23B6', U
"⎶"}, {U
'\u0411', U
"Б"}, {U
'\u0431', U
"б"}, {U
'\u201E', U
"„"}, {U
'\u2235', U
"∵"}, {U
'\u29B0', U
"⦰"}, {U
'\u212C', U
"ℬ"}, {U
'\u0392', U
"Β"}, {U
'\u03B2', U
"β"}, {U
'\u2136', U
"ℶ"}, {U
'\u226C', U
"≬"}, {U
'\U0001d505', U
"𝔅"}, {U
'\U0001d51f', U
"𝔟"}, {U
'\u22C2', U
"⋂"}, {U
'\u25EF', U
"◯"}, {U
'\u22C3', U
"⋃"}, {U
'\u2A00', U
"⨀"}, {U
'\u2A01', U
"⨁"}, {U
'\u2A02', U
"⨂"}, {U
'\u2A06', U
"⨆"}, {U
'\u2605', U
"★"}, {U
'\u25BD', U
"▽"}, {U
'\u25B3', U
"△"}, {U
'\u2A04', U
"⨄"}, {U
'\u22C1', U
"⋁"}, {U
'\u22C0', U
"⋀"}, {U
'\u290D', U
"⤍"}, {U
'\u29EB', U
"⧫"}, {U
'\u25AA', U
"▪"}, {U
'\u25B4', U
"▴"}, {U
'\u25BE', U
"▾"}, {U
'\u25C2', U
"◂"}, {U
'\u25B8', U
"▸"}, {U
'\u2423', U
"␣"}, {U
'\u2592', U
"▒"}, {U
'\u2591', U
"░"}, {U
'\u2593', U
"▓"}, {U
'\u2588', U
"█"}, {U
'\u003D', U
""}, {U
'\u2261', U
""}, {U
'\u2AED', U
"⫭"}, {U
'\u2310', U
"⌐"}, {U
'\U0001d539', U
"𝔹"}, {U
'\U0001d553', U
"𝕓"}, {U
'\u22A5', U
"⊥"}, {U
'\u22C8', U
"⋈"}, {U
'\u29C9', U
"⧉"}, {U
'\u2510', U
"┐"}, {U
'\u2555', U
"╕"}, {U
'\u2556', U
"╖"}, {U
'\u2557', U
"╗"}, {U
'\u250C', U
"┌"}, {U
'\u2552', U
"╒"}, {U
'\u2553', U
"╓"}, {U
'\u2554', U
"╔"}, {U
'\u2500', U
"─"}, {U
'\u2550', U
"═"}, {U
'\u252C', U
"┬"}, {U
'\u2564', U
"╤"}, {U
'\u2565', U
"╥"}, {U
'\u2566', U
"╦"}, {U
'\u2534', U
"┴"}, {U
'\u2567', U
"╧"}, {U
'\u2568', U
"╨"}, {U
'\u2569', U
"╩"}, {U
'\u229F', U
"⊟"}, {U
'\u229E', U
"⊞"}, {U
'\u22A0', U
"⊠"}, {U
'\u2518', U
"┘"}, {U
'\u255B', U
"╛"}, {U
'\u255C', U
"╜"}, {U
'\u255D', U
"╝"}, {U
'\u2514', U
"└"}, {U
'\u2558', U
"╘"}, {U
'\u2559', U
"╙"}, {U
'\u255A', U
"╚"}, {U
'\u2502', U
"│"}, {U
'\u2551', U
"║"}, {U
'\u253C', U
"┼"}, {U
'\u256A', U
"╪"}, {U
'\u256B', U
"╫"}, {U
'\u256C', U
"╬"}, {U
'\u2524', U
"┤"}, {U
'\u2561', U
"╡"}, {U
'\u2562', U
"╢"}, {U
'\u2563', U
"╣"}, {U
'\u251C', U
"├"}, {U
'\u255E', U
"╞"}, {U
'\u255F', U
"╟"}, {U
'\u2560', U
"╠"}, {U
'\u02D8', U
"˘"}, {U
'\u00A6', U
"¦"}, {U
'\U0001d4b7', U
"𝒷"}, {U
'\u204F', U
"⁏"}, {U
'\u29C5', U
"⧅"}, {U
'\u005C', U
"\"}, {U
'\u27C8', U
"⟈"}, {U
'\u2022', U
"•"}, {U
'\u224E', U
"≎"}, {U
'\u2AAE', U
"⪮"}, {U
'\u224F', U
"≏"}, {U
'\u0106', U
"Ć"}, {U
'\u0107', U
"ć"}, {U
'\u2A44', U
"⩄"}, {U
'\u2A49', U
"⩉"}, {U
'\u2A4B', U
"⩋"}, {U
'\u2229', U
"∩"}, {U
'\u22D2', U
"⋒"}, {U
'\u2A47', U
"⩇"}, {U
'\u2A40', U
"⩀"}, {U
'\u2145', U
"ⅅ"}, {U
'\u2229', U
""}, {U
'\u2041', U
"⁁"}, {U
'\u02C7', U
"ˇ"}, {U
'\u212D', U
"ℭ"}, {U
'\u2A4D', U
"⩍"}, {U
'\u010C', U
"Č"}, {U
'\u010D', U
"č"}, {U
'\u00C7', U
"Ç"}, {U
'\u00E7', U
"ç"}, {U
'\u0108', U
"Ĉ"}, {U
'\u0109', U
"ĉ"}, {U
'\u2230', U
"∰"}, {U
'\u2A4C', U
"⩌"}, {U
'\u2A50', U
"⩐"}, {U
'\u010A', U
"Ċ"}, {U
'\u010B', U
"ċ"}, {U
'\u00B8', U
"¸"}, {U
'\u29B2', U
"⦲"}, {U
'\u00A2', U
"¢"}, {U
'\u00B7', U
"·"}, {U
'\U0001d520', U
"𝔠"}, {U
'\u0427', U
"Ч"}, {U
'\u0447', U
"ч"}, {U
'\u2713', U
"✓"}, {U
'\u03A7', U
"Χ"}, {U
'\u03C7', U
"χ"}, {U
'\u02C6', U
"ˆ"}, {U
'\u2257', U
"≗"}, {U
'\u21BA', U
"↺"}, {U
'\u21BB', U
"↻"}, {U
'\u229B', U
"⊛"}, {U
'\u229A', U
"⊚"}, {U
'\u229D', U
"⊝"}, {U
'\u2299', U
"⊙"}, {U
'\u00AE', U
"®"}, {U
'\u24C8', U
"Ⓢ"}, {U
'\u2296', U
"⊖"}, {U
'\u2295', U
"⊕"}, {U
'\u2297', U
"⊗"}, {U
'\u25CB', U
"○"}, {U
'\u29C3', U
"⧃"}, {U
'\u2A10', U
"⨐"}, {U
'\u2AEF', U
"⫯"}, {U
'\u29C2', U
"⧂"}, {U
'\u2232', U
"∲"}, {U
'\u201D', U
"”"}, {U
'\u2019', U
"’"}, {U
'\u2663', U
"♣"}, {U
'\u003A', U
":"}, {U
'\u2237', U
"∷"}, {U
'\u2A74', U
"⩴"}, {U
'\u002C', U
","}, {U
'\u0040', U
"@"}, {U
'\u2201', U
"∁"}, {U
'\u2218', U
"∘"}, {U
'\u2102', U
"ℂ"}, {U
'\u2245', U
"≅"}, {U
'\u2A6D', U
"⩭"}, {U
'\u2261', U
"≡"}, {U
'\u222E', U
"∮"}, {U
'\u222F', U
"∯"}, {U
'\U0001d554', U
"𝕔"}, {U
'\u2210', U
"∐"}, {U
'\u00A9', U
"©"}, {U
'\u2117', U
"℗"}, {U
'\u21B5', U
"↵"}, {U
'\u2717', U
"✗"}, {U
'\u2A2F', U
"⨯"}, {U
'\U0001d49e', U
"𝒞"}, {U
'\U0001d4b8', U
"𝒸"}, {U
'\u2ACF', U
"⫏"}, {U
'\u2AD1', U
"⫑"}, {U
'\u2AD0', U
"⫐"}, {U
'\u2AD2', U
"⫒"}, {U
'\u22EF', U
"⋯"}, {U
'\u2938', U
"⤸"}, {U
'\u2935', U
"⤵"}, {U
'\u22DE', U
"⋞"}, {U
'\u22DF', U
"⋟"}, {U
'\u21B6', U
"↶"}, {U
'\u293D', U
"⤽"}, {U
'\u2A48', U
"⩈"}, {U
'\u2A46', U
"⩆"}, {U
'\u222A', U
"∪"}, {U
'\u22D3', U
"⋓"}, {U
'\u2A4A', U
"⩊"}, {U
'\u228D', U
"⊍"}, {U
'\u2A45', U
"⩅"}, {U
'\u222A', U
""}, {U
'\u21B7', U
"↷"}, {U
'\u293C', U
"⤼"}, {U
'\u22CE', U
"⋎"}, {U
'\u22CF', U
"⋏"}, {U
'\u00A4', U
"¤"}, {U
'\u2231', U
"∱"}, {U
'\u232D', U
"⌭"}, {U
'\u2020', U
"†"}, {U
'\u2021', U
"‡"}, {U
'\u2138', U
"ℸ"}, {U
'\u2193', U
"↓"}, {U
'\u21A1', U
"↡"}, {U
'\u21D3', U
"⇓"}, {U
'\u2010', U
"‐"}, {U
'\u2AE4', U
"⫤"}, {U
'\u22A3', U
"⊣"}, {U
'\u290F', U
"⤏"}, {U
'\u02DD', U
"˝"}, {U
'\u010E', U
"Ď"}, {U
'\u010F', U
"ď"}, {U
'\u0414', U
"Д"}, {U
'\u0434', U
"д"}, {U
'\u21CA', U
"⇊"}, {U
'\u2146', U
"ⅆ"}, {U
'\u2911', U
"⤑"}, {U
'\u2A77', U
"⩷"}, {U
'\u00B0', U
"°"}, {U
'\u2207', U
"∇"}, {U
'\u0394', U
"Δ"}, {U
'\u03B4', U
"δ"}, {U
'\u29B1', U
"⦱"}, {U
'\u297F', U
"⥿"}, {U
'\U0001d507', U
"𝔇"}, {U
'\U0001d521', U
"𝔡"}, {U
'\u2965', U
"⥥"}, {U
'\u21C3', U
"⇃"}, {U
'\u21C2', U
"⇂"}, {U
'\u02D9', U
"˙"}, {U
'\u0060', U
"`"}, {U
'\u02DC', U
"˜"}, {U
'\u22C4', U
"⋄"}, {U
'\u2666', U
"♦"}, {U
'\u00A8', U
"¨"}, {U
'\u03DD', U
"ϝ"}, {U
'\u22F2', U
"⋲"}, {U
'\u00F7', U
"÷"}, {U
'\u22C7', U
"⋇"}, {U
'\u0402', U
"Ђ"}, {U
'\u0452', U
"ђ"}, {U
'\u231E', U
"⌞"}, {U
'\u230D', U
"⌍"}, {U
'\u0024', U
"$"}, {U
'\U0001d53b', U
"𝔻"}, {U
'\U0001d555', U
"𝕕"}, {U
'\u20DC', U
"⃜"}, {U
'\u2250', U
"≐"}, {U
'\u2251', U
"≑"}, {U
'\u2238', U
"∸"}, {U
'\u2214', U
"∔"}, {U
'\u22A1', U
"⊡"}, {U
'\u21D0', U
"⇐"}, {U
'\u21D4', U
"⇔"}, {U
'\u27F8', U
"⟸"}, {U
'\u27FA', U
"⟺"}, {U
'\u27F9', U
"⟹"}, {U
'\u21D2', U
"⇒"}, {U
'\u22A8', U
"⊨"}, {U
'\u21D1', U
"⇑"}, {U
'\u21D5', U
"⇕"}, {U
'\u2225', U
"∥"}, {U
'\u2913', U
"⤓"}, {U
'\u21F5', U
"⇵"}, {U
'\u0311', U
"̑"}, {U
'\u2950', U
"⥐"}, {U
'\u295E', U
"⥞"}, {U
'\u2956', U
"⥖"}, {U
'\u21BD', U
"↽"}, {U
'\u295F', U
"⥟"}, {U
'\u2957', U
"⥗"}, {U
'\u21C1', U
"⇁"}, {U
'\u21A7', U
"↧"}, {U
'\u22A4', U
"⊤"}, {U
'\u2910', U
"⤐"}, {U
'\u231F', U
"⌟"}, {U
'\u230C', U
"⌌"}, {U
'\U0001d49f', U
"𝒟"}, {U
'\U0001d4b9', U
"𝒹"}, {U
'\u0405', U
"Ѕ"}, {U
'\u0455', U
"ѕ"}, {U
'\u29F6', U
"⧶"}, {U
'\u0110', U
"Đ"}, {U
'\u0111', U
"đ"}, {U
'\u22F1', U
"⋱"}, {U
'\u25BF', U
"▿"}, {U
'\u296F', U
"⥯"}, {U
'\u29A6', U
"⦦"}, {U
'\u040F', U
"Џ"}, {U
'\u045F', U
"џ"}, {U
'\u27FF', U
"⟿"}, {U
'\u00C9', U
"É"}, {U
'\u00E9', U
"é"}, {U
'\u2A6E', U
"⩮"}, {U
'\u011A', U
"Ě"}, {U
'\u011B', U
"ě"}, {U
'\u00CA', U
"Ê"}, {U
'\u00EA', U
"ê"}, {U
'\u2256', U
"≖"}, {U
'\u2255', U
"≕"}, {U
'\u042D', U
"Э"}, {U
'\u044D', U
"э"}, {U
'\u0116', U
"Ė"}, {U
'\u0117', U
"ė"}, {U
'\u2147', U
"ⅇ"}, {U
'\u2252', U
"≒"}, {U
'\U0001d508', U
"𝔈"}, {U
'\U0001d522', U
"𝔢"}, {U
'\u2A9A', U
"⪚"}, {U
'\u00C8', U
"È"}, {U
'\u00E8', U
"è"}, {U
'\u2A96', U
"⪖"}, {U
'\u2A98', U
"⪘"}, {U
'\u2A99', U
"⪙"}, {U
'\u2208', U
"∈"}, {U
'\u23E7', U
"⏧"}, {U
'\u2113', U
"ℓ"}, {U
'\u2A95', U
"⪕"}, {U
'\u2A97', U
"⪗"}, {U
'\u0112', U
"Ē"}, {U
'\u0113', U
"ē"}, {U
'\u2205', U
"∅"}, {U
'\u25FB', U
"◻"}, {U
'\u25AB', U
"▫"}, {U
'\u2004', U
" "}, {U
'\u2005', U
" "}, {U
'\u2003', U
" "}, {U
'\u014A', U
"Ŋ"}, {U
'\u014B', U
"ŋ"}, {U
'\u2002', U
" "}, {U
'\u0118', U
"Ę"}, {U
'\u0119', U
"ę"}, {U
'\U0001d53c', U
"𝔼"}, {U
'\U0001d556', U
"𝕖"}, {U
'\u22D5', U
"⋕"}, {U
'\u29E3', U
"⧣"}, {U
'\u2A71', U
"⩱"}, {U
'\u03B5', U
"ε"}, {U
'\u0395', U
"Ε"}, {U
'\u03F5', U
"ϵ"}, {U
'\u2242', U
"≂"}, {U
'\u2A75', U
"⩵"}, {U
'\u003D', U
"="}, {U
'\u225F', U
"≟"}, {U
'\u21CC', U
"⇌"}, {U
'\u2A78', U
"⩸"}, {U
'\u29E5', U
"⧥"}, {U
'\u2971', U
"⥱"}, {U
'\u2253', U
"≓"}, {U
'\u212F', U
"ℯ"}, {U
'\u2130', U
"ℰ"}, {U
'\u2A73', U
"⩳"}, {U
'\u0397', U
"Η"}, {U
'\u03B7', U
"η"}, {U
'\u00D0', U
"Ð"}, {U
'\u00F0', U
"ð"}, {U
'\u00CB', U
"Ë"}, {U
'\u00EB', U
"ë"}, {U
'\u20AC', U
"€"}, {U
'\u0021', U
"!"}, {U
'\u2203', U
"∃"}, {U
'\u0424', U
"Ф"}, {U
'\u0444', U
"ф"}, {U
'\u2640', U
"♀"}, {U
'\uFB03', U
"ffi"}, {U
'\uFB00', U
"ff"}, {U
'\uFB04', U
"ffl"}, {U
'\U0001d509', U
"𝔉"}, {U
'\U0001d523', U
"𝔣"}, {U
'\uFB01', U
"fi"}, {U
'\u25FC', U
"◼"}, {U
'\u266D', U
"♭"}, {U
'\uFB02', U
"fl"}, {U
'\u25B1', U
"▱"}, {U
'\u0192', U
"ƒ"}, {U
'\U0001d53d', U
"𝔽"}, {U
'\U0001d557', U
"𝕗"}, {U
'\u2200', U
"∀"}, {U
'\u22D4', U
"⋔"}, {U
'\u2AD9', U
"⫙"}, {U
'\u2131', U
"ℱ"}, {U
'\u2A0D', U
"⨍"}, {U
'\u00BD', U
"½"}, {U
'\u2153', U
"⅓"}, {U
'\u00BC', U
"¼"}, {U
'\u2155', U
"⅕"}, {U
'\u2159', U
"⅙"}, {U
'\u215B', U
"⅛"}, {U
'\u2154', U
"⅔"}, {U
'\u2156', U
"⅖"}, {U
'\u00BE', U
"¾"}, {U
'\u2157', U
"⅗"}, {U
'\u215C', U
"⅜"}, {U
'\u2158', U
"⅘"}, {U
'\u215A', U
"⅚"}, {U
'\u215D', U
"⅝"}, {U
'\u215E', U
"⅞"}, {U
'\u2044', U
"⁄"}, {U
'\u2322', U
"⌢"}, {U
'\U0001d4bb', U
"𝒻"}, {U
'\u01F5', U
"ǵ"}, {U
'\u0393', U
"Γ"}, {U
'\u03B3', U
"γ"}, {U
'\u03DC', U
"Ϝ"}, {U
'\u2A86', U
"⪆"}, {U
'\u011E', U
"Ğ"}, {U
'\u011F', U
"ğ"}, {U
'\u0122', U
"Ģ"}, {U
'\u011C', U
"Ĝ"}, {U
'\u011D', U
"ĝ"}, {U
'\u0413', U
"Г"}, {U
'\u0433', U
"г"}, {U
'\u0120', U
"Ġ"}, {U
'\u0121', U
"ġ"}, {U
'\u2265', U
"≥"}, {U
'\u2267', U
"≧"}, {U
'\u2A8C', U
"⪌"}, {U
'\u22DB', U
"⋛"}, {U
'\u2A7E', U
"⩾"}, {U
'\u2AA9', U
"⪩"}, {U
'\u2A80', U
"⪀"}, {U
'\u2A82', U
"⪂"}, {U
'\u2A84', U
"⪄"}, {U
'\u22DB', U
""}, {U
'\u2A94', U
"⪔"}, {U
'\U0001d50a', U
"𝔊"}, {U
'\U0001d524', U
"𝔤"}, {U
'\u226B', U
"≫"}, {U
'\u22D9', U
"⋙"}, {U
'\u2137', U
"ℷ"}, {U
'\u0403', U
"Ѓ"}, {U
'\u0453', U
"ѓ"}, {U
'\u2AA5', U
"⪥"}, {U
'\u2277', U
"≷"}, {U
'\u2A92', U
"⪒"}, {U
'\u2AA4', U
"⪤"}, {U
'\u2A8A', U
"⪊"}, {U
'\u2A88', U
"⪈"}, {U
'\u2269', U
"≩"}, {U
'\u22E7', U
"⋧"}, {U
'\U0001d53e', U
"𝔾"}, {U
'\U0001d558', U
"𝕘"}, {U
'\u2AA2', U
"⪢"}, {U
'\u2273', U
"≳"}, {U
'\U0001d4a2', U
"𝒢"}, {U
'\u210A', U
"ℊ"}, {U
'\u2A8E', U
"⪎"}, {U
'\u2A90', U
"⪐"}, {U
'\u2AA7', U
"⪧"}, {U
'\u2A7A', U
"⩺"}, {U
'\u003E', U
">"}, {U
'\u22D7', U
"⋗"}, {U
'\u2995', U
"⦕"}, {U
'\u2A7C', U
"⩼"}, {U
'\u2978', U
"⥸"}, {U
'\u2269', U
""}, {U
'\u200A', U
" "}, {U
'\u210B', U
"ℋ"}, {U
'\u042A', U
"Ъ"}, {U
'\u044A', U
"ъ"}, {U
'\u2948', U
"⥈"}, {U
'\u2194', U
"↔"}, {U
'\u21AD', U
"↭"}, {U
'\u005E', U
"^"}, {U
'\u210F', U
"ℏ"}, {U
'\u0124', U
"Ĥ"}, {U
'\u0125', U
"ĥ"}, {U
'\u2665', U
"♥"}, {U
'\u2026', U
"…"}, {U
'\u22B9', U
"⊹"}, {U
'\U0001d525', U
"𝔥"}, {U
'\u210C', U
"ℌ"}, {U
'\u2925', U
"⤥"}, {U
'\u2926', U
"⤦"}, {U
'\u21FF', U
"⇿"}, {U
'\u223B', U
"∻"}, {U
'\u21A9', U
"↩"}, {U
'\u21AA', U
"↪"}, {U
'\U0001d559', U
"𝕙"}, {U
'\u210D', U
"ℍ"}, {U
'\u2015', U
"―"}, {U
'\U0001d4bd', U
"𝒽"}, {U
'\u0126', U
"Ħ"}, {U
'\u0127', U
"ħ"}, {U
'\u2043', U
"⁃"}, {U
'\u00CD', U
"Í"}, {U
'\u00ED', U
"í"}, {U
'\u2063', U
"⁣"}, {U
'\u00CE', U
"Î"}, {U
'\u00EE', U
"î"}, {U
'\u0418', U
"И"}, {U
'\u0438', U
"и"}, {U
'\u0130', U
"İ"}, {U
'\u0415', U
"Е"}, {U
'\u0435', U
"е"}, {U
'\u00A1', U
"¡"}, {U
'\U0001d526', U
"𝔦"}, {U
'\u2111', U
"ℑ"}, {U
'\u00CC', U
"Ì"}, {U
'\u00EC', U
"ì"}, {U
'\u2148', U
"ⅈ"}, {U
'\u2A0C', U
"⨌"}, {U
'\u222D', U
"∭"}, {U
'\u29DC', U
"⧜"}, {U
'\u2129', U
"℩"}, {U
'\u0132', U
"IJ"}, {U
'\u0133', U
"ij"}, {U
'\u012A', U
"Ī"}, {U
'\u012B', U
"ī"}, {U
'\u2110', U
"ℐ"}, {U
'\u0131', U
"ı"}, {U
'\u22B7', U
"⊷"}, {U
'\u01B5', U
"Ƶ"}, {U
'\u2105', U
"℅"}, {U
'\u221E', U
"∞"}, {U
'\u29DD', U
"⧝"}, {U
'\u22BA', U
"⊺"}, {U
'\u222B', U
"∫"}, {U
'\u222C', U
"∬"}, {U
'\u2124', U
"ℤ"}, {U
'\u2A17', U
"⨗"}, {U
'\u2A3C', U
"⨼"}, {U
'\u2062', U
"⁢"}, {U
'\u0401', U
"Ё"}, {U
'\u0451', U
"ё"}, {U
'\u012E', U
"Į"}, {U
'\u012F', U
"į"}, {U
'\U0001d540', U
"𝕀"}, {U
'\U0001d55a', U
"𝕚"}, {U
'\u0399', U
"Ι"}, {U
'\u03B9', U
"ι"}, {U
'\u00BF', U
"¿"}, {U
'\U0001d4be', U
"𝒾"}, {U
'\u22F5', U
"⋵"}, {U
'\u22F9', U
"⋹"}, {U
'\u22F4', U
"⋴"}, {U
'\u22F3', U
"⋳"}, {U
'\u0128', U
"Ĩ"}, {U
'\u0129', U
"ĩ"}, {U
'\u0406', U
"І"}, {U
'\u0456', U
"і"}, {U
'\u00CF', U
"Ï"}, {U
'\u00EF', U
"ï"}, {U
'\u0134', U
"Ĵ"}, {U
'\u0135', U
"ĵ"}, {U
'\u0419', U
"Й"}, {U
'\u0439', U
"й"}, {U
'\U0001d50d', U
"𝔍"}, {U
'\U0001d527', U
"𝔧"}, {U
'\u0237', U
"ȷ"}, {U
'\U0001d541', U
"𝕁"}, {U
'\U0001d55b', U
"𝕛"}, {U
'\U0001d4a5', U
"𝒥"}, {U
'\U0001d4bf', U
"𝒿"}, {U
'\u0408', U
"Ј"}, {U
'\u0458', U
"ј"}, {U
'\u0404', U
"Є"}, {U
'\u0454', U
"є"}, {U
'\u039A', U
"Κ"}, {U
'\u03BA', U
"κ"}, {U
'\u03F0', U
"ϰ"}, {U
'\u0136', U
"Ķ"}, {U
'\u0137', U
"ķ"}, {U
'\u041A', U
"К"}, {U
'\u043A', U
"к"}, {U
'\U0001d50e', U
"𝔎"}, {U
'\U0001d528', U
"𝔨"}, {U
'\u0138', U
"ĸ"}, {U
'\u0425', U
"Х"}, {U
'\u0445', U
"х"}, {U
'\u040C', U
"Ќ"}, {U
'\u045C', U
"ќ"}, {U
'\U0001d542', U
"𝕂"}, {U
'\U0001d55c', U
"𝕜"}, {U
'\U0001d4a6', U
"𝒦"}, {U
'\U0001d4c0', U
"𝓀"}, {U
'\u21DA', U
"⇚"}, {U
'\u0139', U
"Ĺ"}, {U
'\u013A', U
"ĺ"}, {U
'\u29B4', U
"⦴"}, {U
'\u2112', U
"ℒ"}, {U
'\u039B', U
"Λ"}, {U
'\u03BB', U
"λ"}, {U
'\u27E8', U
"⟨"}, {U
'\u27EA', U
"⟪"}, {U
'\u2991', U
"⦑"}, {U
'\u2A85', U
"⪅"}, {U
'\u00AB', U
"«"}, {U
'\u21E4', U
"⇤"}, {U
'\u291F', U
"⤟"}, {U
'\u2190', U
"←"}, {U
'\u219E', U
"↞"}, {U
'\u291D', U
"⤝"}, {U
'\u21AB', U
"↫"}, {U
'\u2939', U
"⤹"}, {U
'\u2973', U
"⥳"}, {U
'\u21A2', U
"↢"}, {U
'\u2919', U
"⤙"}, {U
'\u291B', U
"⤛"}, {U
'\u2AAB', U
"⪫"}, {U
'\u2AAD', U
"⪭"}, {U
'\u2AAD', U
""}, {U
'\u290C', U
"⤌"}, {U
'\u290E', U
"⤎"}, {U
'\u2772', U
"❲"}, {U
'\u007B', U
"{"}, {U
'\u005B', U
"["}, {U
'\u298B', U
"⦋"}, {U
'\u298F', U
"⦏"}, {U
'\u298D', U
"⦍"}, {U
'\u013D', U
"Ľ"}, {U
'\u013E', U
"ľ"}, {U
'\u013B', U
"Ļ"}, {U
'\u013C', U
"ļ"}, {U
'\u2308', U
"⌈"}, {U
'\u041B', U
"Л"}, {U
'\u043B', U
"л"}, {U
'\u2936', U
"⤶"}, {U
'\u201C', U
"“"}, {U
'\u2967', U
"⥧"}, {U
'\u294B', U
"⥋"}, {U
'\u21B2', U
"↲"}, {U
'\u2264', U
"≤"}, {U
'\u2266', U
"≦"}, {U
'\u21C6', U
"⇆"}, {U
'\u27E6', U
"⟦"}, {U
'\u2961', U
"⥡"}, {U
'\u2959', U
"⥙"}, {U
'\u230A', U
"⌊"}, {U
'\u21BC', U
"↼"}, {U
'\u21C7', U
"⇇"}, {U
'\u21CB', U
"⇋"}, {U
'\u294E', U
"⥎"}, {U
'\u21A4', U
"↤"}, {U
'\u295A', U
"⥚"}, {U
'\u22CB', U
"⋋"}, {U
'\u29CF', U
"⧏"}, {U
'\u22B2', U
"⊲"}, {U
'\u22B4', U
"⊴"}, {U
'\u2951', U
"⥑"}, {U
'\u2960', U
"⥠"}, {U
'\u2958', U
"⥘"}, {U
'\u21BF', U
"↿"}, {U
'\u2952', U
"⥒"}, {U
'\u2A8B', U
"⪋"}, {U
'\u22DA', U
"⋚"}, {U
'\u2A7D', U
"⩽"}, {U
'\u2AA8', U
"⪨"}, {U
'\u2A7F', U
"⩿"}, {U
'\u2A81', U
"⪁"}, {U
'\u2A83', U
"⪃"}, {U
'\u22DA', U
""}, {U
'\u2A93', U
"⪓"}, {U
'\u22D6', U
"⋖"}, {U
'\u2276', U
"≶"}, {U
'\u2AA1', U
"⪡"}, {U
'\u2272', U
"≲"}, {U
'\u297C', U
"⥼"}, {U
'\U0001d50f', U
"𝔏"}, {U
'\U0001d529', U
"𝔩"}, {U
'\u2A91', U
"⪑"}, {U
'\u2962', U
"⥢"}, {U
'\u296A', U
"⥪"}, {U
'\u2584', U
"▄"}, {U
'\u0409', U
"Љ"}, {U
'\u0459', U
"љ"}, {U
'\u226A', U
"≪"}, {U
'\u22D8', U
"⋘"}, {U
'\u296B', U
"⥫"}, {U
'\u25FA', U
"◺"}, {U
'\u013F', U
"Ŀ"}, {U
'\u0140', U
"ŀ"}, {U
'\u23B0', U
"⎰"}, {U
'\u2A89', U
"⪉"}, {U
'\u2A87', U
"⪇"}, {U
'\u2268', U
"≨"}, {U
'\u22E6', U
"⋦"}, {U
'\u27EC', U
"⟬"}, {U
'\u21FD', U
"⇽"}, {U
'\u27F5', U
"⟵"}, {U
'\u27F7', U
"⟷"}, {U
'\u27FC', U
"⟼"}, {U
'\u27F6', U
"⟶"}, {U
'\u21AC', U
"↬"}, {U
'\u2985', U
"⦅"}, {U
'\U0001d543', U
"𝕃"}, {U
'\U0001d55d', U
"𝕝"}, {U
'\u2A2D', U
"⨭"}, {U
'\u2A34', U
"⨴"}, {U
'\u2217', U
"∗"}, {U
'\u005F', U
"_"}, {U
'\u2199', U
"↙"}, {U
'\u2198', U
"↘"}, {U
'\u25CA', U
"◊"}, {U
'\u0028', U
"("}, {U
'\u2993', U
"⦓"}, {U
'\u296D', U
"⥭"}, {U
'\u200E', U
"‎"}, {U
'\u22BF', U
"⊿"}, {U
'\u2039', U
"‹"}, {U
'\U0001d4c1', U
"𝓁"}, {U
'\u21B0', U
"↰"}, {U
'\u2A8D', U
"⪍"}, {U
'\u2A8F', U
"⪏"}, {U
'\u2018', U
"‘"}, {U
'\u201A', U
"‚"}, {U
'\u0141', U
"Ł"}, {U
'\u0142', U
"ł"}, {U
'\u2AA6', U
"⪦"}, {U
'\u2A79', U
"⩹"}, {U
'\u003C', U
"<"}, {U
'\u22C9', U
"⋉"}, {U
'\u2976', U
"⥶"}, {U
'\u2A7B', U
"⩻"}, {U
'\u25C3', U
"◃"}, {U
'\u2996', U
"⦖"}, {U
'\u294A', U
"⥊"}, {U
'\u2966', U
"⥦"}, {U
'\u2268', U
""}, {U
'\u00AF', U
"¯"}, {U
'\u2642', U
"♂"}, {U
'\u2720', U
"✠"}, {U
'\u2905', U
"⤅"}, {U
'\u21A6', U
"↦"}, {U
'\u21A5', U
"↥"}, {U
'\u25AE', U
"▮"}, {U
'\u2A29', U
"⨩"}, {U
'\u041C', U
"М"}, {U
'\u043C', U
"м"}, {U
'\u2014', U
"—"}, {U
'\u223A', U
"∺"}, {U
'\u205F', U
" "}, {U
'\u2133', U
"ℳ"}, {U
'\U0001d510', U
"𝔐"}, {U
'\U0001d52a', U
"𝔪"}, {U
'\u2127', U
"℧"}, {U
'\u00B5', U
"µ"}, {U
'\u2AF0', U
"⫰"}, {U
'\u2223', U
"∣"}, {U
'\u2212', U
"−"}, {U
'\u2A2A', U
"⨪"}, {U
'\u2213', U
"∓"}, {U
'\u2ADB', U
"⫛"}, {U
'\u22A7', U
"⊧"}, {U
'\U0001d544', U
"𝕄"}, {U
'\U0001d55e', U
"𝕞"}, {U
'\U0001d4c2', U
"𝓂"}, {U
'\u039C', U
"Μ"}, {U
'\u03BC', U
"μ"}, {U
'\u22B8', U
"⊸"}, {U
'\u0143', U
"Ń"}, {U
'\u0144', U
"ń"}, {U
'\u2220', U
""}, {U
'\u2249', U
"≉"}, {U
'\u2A70', U
""}, {U
'\u224B', U
""}, {U
'\u0149', U
"ʼn"}, {U
'\u266E', U
"♮"}, {U
'\u2115', U
"ℕ"}, {U
'\u00A0', U
" "}, {U
'\u224E', U
""}, {U
'\u224F', U
""}, {U
'\u2A43', U
"⩃"}, {U
'\u0147', U
"Ň"}, {U
'\u0148', U
"ň"}, {U
'\u0145', U
"Ņ"}, {U
'\u0146', U
"ņ"}, {U
'\u2247', U
"≇"}, {U
'\u2A6D', U
""}, {U
'\u2A42', U
"⩂"}, {U
'\u041D', U
"Н"}, {U
'\u043D', U
"н"}, {U
'\u2013', U
"–"}, {U
'\u2924', U
"⤤"}, {U
'\u2197', U
"↗"}, {U
'\u21D7', U
"⇗"}, {U
'\u2260', U
"≠"}, {U
'\u2250', U
""}, {U
'\u200B', U
"​"}, {U
'\u2262', U
"≢"}, {U
'\u2928', U
"⤨"}, {U
'\u2242', U
""}, {U
'\u000A', U
"
"}, {U
'\u2204', U
"∄"}, {U
'\U0001d511', U
"𝔑"}, {U
'\U0001d52b', U
"𝔫"}, {U
'\u2267', U
""}, {U
'\u2271', U
"≱"}, {U
'\u2A7E', U
""}, {U
'\u22D9', U
""}, {U
'\u2275', U
"≵"}, {U
'\u226B', U
""}, {U
'\u226F', U
"≯"}, {U
'\u226B', U
""}, {U
'\u21AE', U
"↮"}, {U
'\u21CE', U
"⇎"}, {U
'\u2AF2', U
"⫲"}, {U
'\u220B', U
"∋"}, {U
'\u22FC', U
"⋼"}, {U
'\u22FA', U
"⋺"}, {U
'\u040A', U
"Њ"}, {U
'\u045A', U
"њ"}, {U
'\u219A', U
"↚"}, {U
'\u21CD', U
"⇍"}, {U
'\u2025', U
"‥"}, {U
'\u2266', U
""}, {U
'\u2270', U
"≰"}, {U
'\u2A7D', U
""}, {U
'\u226E', U
"≮"}, {U
'\u22D8', U
""}, {U
'\u2274', U
"≴"}, {U
'\u226A', U
""}, {U
'\u22EA', U
"⋪"}, {U
'\u22EC', U
"⋬"}, {U
'\u226A', U
""}, {U
'\u2224', U
"∤"}, {U
'\u2060', U
"⁠"}, {U
'\U0001d55f', U
"𝕟"}, {U
'\u2AEC', U
"⫬"}, {U
'\u00AC', U
"¬"}, {U
'\u226D', U
"≭"}, {U
'\u2226', U
"∦"}, {U
'\u2209', U
"∉"}, {U
'\u2279', U
"≹"}, {U
'\u22F5', U
""}, {U
'\u22F9', U
""}, {U
'\u22F7', U
"⋷"}, {U
'\u22F6', U
"⋶"}, {U
'\u29CF', U
""}, {U
'\u2278', U
"≸"}, {U
'\u2AA2', U
""}, {U
'\u2AA1', U
""}, {U
'\u220C', U
"∌"}, {U
'\u22FE', U
"⋾"}, {U
'\u22FD', U
"⋽"}, {U
'\u2280', U
"⊀"}, {U
'\u2AAF', U
""}, {U
'\u22E0', U
"⋠"}, {U
'\u29D0', U
""}, {U
'\u22EB', U
"⋫"}, {U
'\u22ED', U
"⋭"}, {U
'\u228F', U
""}, {U
'\u22E2', U
"⋢"}, {U
'\u2290', U
""}, {U
'\u22E3', U
"⋣"}, {U
'\u2282', U
""}, {U
'\u2288', U
"⊈"}, {U
'\u2281', U
"⊁"}, {U
'\u2AB0', U
""}, {U
'\u22E1', U
"⋡"}, {U
'\u227F', U
""}, {U
'\u2283', U
""}, {U
'\u2289', U
"⊉"}, {U
'\u2241', U
"≁"}, {U
'\u2244', U
"≄"}, {U
'\u2AFD', U
""}, {U
'\u2202', U
""}, {U
'\u2A14', U
"⨔"}, {U
'\u2933', U
""}, {U
'\u219B', U
"↛"}, {U
'\u21CF', U
"⇏"}, {U
'\u219D', U
""}, {U
'\U0001d4a9', U
"𝒩"}, {U
'\U0001d4c3', U
"𝓃"}, {U
'\u2284', U
"⊄"}, {U
'\u2AC5', U
""}, {U
'\u2285', U
"⊅"}, {U
'\u2AC6', U
""}, {U
'\u00D1', U
"Ñ"}, {U
'\u00F1', U
"ñ"}, {U
'\u039D', U
"Ν"}, {U
'\u03BD', U
"ν"}, {U
'\u0023', U
"#"}, {U
'\u2116', U
"№"}, {U
'\u2007', U
" "}, {U
'\u224D', U
""}, {U
'\u22AC', U
"⊬"}, {U
'\u22AD', U
"⊭"}, {U
'\u22AE', U
"⊮"}, {U
'\u22AF', U
"⊯"}, {U
'\u2265', U
""}, {U
'\u003E', U
""}, {U
'\u2904', U
"⤄"}, {U
'\u29DE', U
"⧞"}, {U
'\u2902', U
"⤂"}, {U
'\u2264', U
""}, {U
'\u003C', U
""}, {U
'\u22B4', U
""}, {U
'\u2903', U
"⤃"}, {U
'\u22B5', U
""}, {U
'\u223C', U
""}, {U
'\u2923', U
"⤣"}, {U
'\u2196', U
"↖"}, {U
'\u21D6', U
"⇖"}, {U
'\u2927', U
"⤧"}, {U
'\u00D3', U
"Ó"}, {U
'\u00F3', U
"ó"}, {U
'\u00D4', U
"Ô"}, {U
'\u00F4', U
"ô"}, {U
'\u041E', U
"О"}, {U
'\u043E', U
"о"}, {U
'\u0150', U
"Ő"}, {U
'\u0151', U
"ő"}, {U
'\u2A38', U
"⨸"}, {U
'\u29BC', U
"⦼"}, {U
'\u0152', U
"Œ"}, {U
'\u0153', U
"œ"}, {U
'\u29BF', U
"⦿"}, {U
'\U0001d512', U
"𝔒"}, {U
'\U0001d52c', U
"𝔬"}, {U
'\u02DB', U
"˛"}, {U
'\u00D2', U
"Ò"}, {U
'\u00F2', U
"ò"}, {U
'\u29C1', U
"⧁"}, {U
'\u29B5', U
"⦵"}, {U
'\u03A9', U
"Ω"}, {U
'\u29BE', U
"⦾"}, {U
'\u29BB', U
"⦻"}, {U
'\u203E', U
"‾"}, {U
'\u29C0', U
"⧀"}, {U
'\u014C', U
"Ō"}, {U
'\u014D', U
"ō"}, {U
'\u03C9', U
"ω"}, {U
'\u039F', U
"Ο"}, {U
'\u03BF', U
"ο"}, {U
'\u29B6', U
"⦶"}, {U
'\U0001d546', U
"𝕆"}, {U
'\U0001d560', U
"𝕠"}, {U
'\u29B7', U
"⦷"}, {U
'\u29B9', U
"⦹"}, {U
'\u2A54', U
"⩔"}, {U
'\u2228', U
"∨"}, {U
'\u2A5D', U
"⩝"}, {U
'\u2134', U
"ℴ"}, {U
'\u00AA', U
"ª"}, {U
'\u00BA', U
"º"}, {U
'\u22B6', U
"⊶"}, {U
'\u2A56', U
"⩖"}, {U
'\u2A57', U
"⩗"}, {U
'\u2A5B', U
"⩛"}, {U
'\U0001d4aa', U
"𝒪"}, {U
'\u00D8', U
"Ø"}, {U
'\u00F8', U
"ø"}, {U
'\u2298', U
"⊘"}, {U
'\u00D5', U
"Õ"}, {U
'\u00F5', U
"õ"}, {U
'\u2A36', U
"⨶"}, {U
'\u2A37', U
"⨷"}, {U
'\u00D6', U
"Ö"}, {U
'\u00F6', U
"ö"}, {U
'\u233D', U
"⌽"}, {U
'\u23DE', U
"⏞"}, {U
'\u23B4', U
"⎴"}, {U
'\u23DC', U
"⏜"}, {U
'\u00B6', U
"¶"}, {U
'\u2AF3', U
"⫳"}, {U
'\u2AFD', U
"⫽"}, {U
'\u2202', U
"∂"}, {U
'\u041F', U
"П"}, {U
'\u043F', U
"п"}, {U
'\u0025', U
"%"}, {U
'\u002E', U
"."}, {U
'\u2030', U
"‰"}, {U
'\u2031', U
"‱"}, {U
'\U0001d513', U
"𝔓"}, {U
'\U0001d52d', U
"𝔭"}, {U
'\u03A6', U
"Φ"}, {U
'\u03C6', U
"φ"}, {U
'\u03D5', U
"ϕ"}, {U
'\u260E', U
"☎"}, {U
'\u03A0', U
"Π"}, {U
'\u03C0', U
"π"}, {U
'\u03D6', U
"ϖ"}, {U
'\u210E', U
"ℎ"}, {U
'\u2A23', U
"⨣"}, {U
'\u2A22', U
"⨢"}, {U
'\u002B', U
"+"}, {U
'\u2A25', U
"⨥"}, {U
'\u2A72', U
"⩲"}, {U
'\u00B1', U
"±"}, {U
'\u2A26', U
"⨦"}, {U
'\u2A27', U
"⨧"}, {U
'\u2A15', U
"⨕"}, {U
'\U0001d561', U
"𝕡"}, {U
'\u2119', U
"ℙ"}, {U
'\u00A3', U
"£"}, {U
'\u2AB7', U
"⪷"}, {U
'\u2ABB', U
"⪻"}, {U
'\u227A', U
"≺"}, {U
'\u227C', U
"≼"}, {U
'\u2AAF', U
"⪯"}, {U
'\u227E', U
"≾"}, {U
'\u2AB9', U
"⪹"}, {U
'\u2AB5', U
"⪵"}, {U
'\u22E8', U
"⋨"}, {U
'\u2AB3', U
"⪳"}, {U
'\u2032', U
"′"}, {U
'\u2033', U
"″"}, {U
'\u220F', U
"∏"}, {U
'\u232E', U
"⌮"}, {U
'\u2312', U
"⌒"}, {U
'\u2313', U
"⌓"}, {U
'\u221D', U
"∝"}, {U
'\u22B0', U
"⊰"}, {U
'\U0001d4ab', U
"𝒫"}, {U
'\U0001d4c5', U
"𝓅"}, {U
'\u03A8', U
"Ψ"}, {U
'\u03C8', U
"ψ"}, {U
'\u2008', U
" "}, {U
'\U0001d514', U
"𝔔"}, {U
'\U0001d52e', U
"𝔮"}, {U
'\U0001d562', U
"𝕢"}, {U
'\u211A', U
"ℚ"}, {U
'\u2057', U
"⁗"}, {U
'\U0001d4ac', U
"𝒬"}, {U
'\U0001d4c6', U
"𝓆"}, {U
'\u2A16', U
"⨖"}, {U
'\u003F', U
"?"}, {U
'\u0022', U
"""}, {U
'\u21DB', U
"⇛"}, {U
'\u223D', U
""}, {U
'\u0154', U
"Ŕ"}, {U
'\u0155', U
"ŕ"}, {U
'\u221A', U
"√"}, {U
'\u29B3', U
"⦳"}, {U
'\u27E9', U
"⟩"}, {U
'\u27EB', U
"⟫"}, {U
'\u2992', U
"⦒"}, {U
'\u29A5', U
"⦥"}, {U
'\u00BB', U
"»"}, {U
'\u2975', U
"⥵"}, {U
'\u21E5', U
"⇥"}, {U
'\u2920', U
"⤠"}, {U
'\u2933', U
"⤳"}, {U
'\u2192', U
"→"}, {U
'\u21A0', U
"↠"}, {U
'\u291E', U
"⤞"}, {U
'\u2945', U
"⥅"}, {U
'\u2974', U
"⥴"}, {U
'\u2916', U
"⤖"}, {U
'\u21A3', U
"↣"}, {U
'\u219D', U
"↝"}, {U
'\u291A', U
"⤚"}, {U
'\u291C', U
"⤜"}, {U
'\u2236', U
"∶"}, {U
'\u2773', U
"❳"}, {U
'\u007D', U
"}"}, {U
'\u005D', U
"]"}, {U
'\u298C', U
"⦌"}, {U
'\u298E', U
"⦎"}, {U
'\u2990', U
"⦐"}, {U
'\u0158', U
"Ř"}, {U
'\u0159', U
"ř"}, {U
'\u0156', U
"Ŗ"}, {U
'\u0157', U
"ŗ"}, {U
'\u2309', U
"⌉"}, {U
'\u0420', U
"Р"}, {U
'\u0440', U
"р"}, {U
'\u2937', U
"⤷"}, {U
'\u2969', U
"⥩"}, {U
'\u21B3', U
"↳"}, {U
'\u211C', U
"ℜ"}, {U
'\u211B', U
"ℛ"}, {U
'\u211D', U
"ℝ"}, {U
'\u25AD', U
"▭"}, {U
'\u297D', U
"⥽"}, {U
'\u230B', U
"⌋"}, {U
'\U0001d52f', U
"𝔯"}, {U
'\u2964', U
"⥤"}, {U
'\u21C0', U
"⇀"}, {U
'\u296C', U
"⥬"}, {U
'\u03A1', U
"Ρ"}, {U
'\u03C1', U
"ρ"}, {U
'\u03F1', U
"ϱ"}, {U
'\u21C4', U
"⇄"}, {U
'\u27E7', U
"⟧"}, {U
'\u295D', U
"⥝"}, {U
'\u2955', U
"⥕"}, {U
'\u21C9', U
"⇉"}, {U
'\u22A2', U
"⊢"}, {U
'\u295B', U
"⥛"}, {U
'\u22CC', U
"⋌"}, {U
'\u29D0', U
"⧐"}, {U
'\u22B3', U
"⊳"}, {U
'\u22B5', U
"⊵"}, {U
'\u294F', U
"⥏"}, {U
'\u295C', U
"⥜"}, {U
'\u2954', U
"⥔"}, {U
'\u21BE', U
"↾"}, {U
'\u2953', U
"⥓"}, {U
'\u02DA', U
"˚"}, {U
'\u200F', U
"‏"}, {U
'\u23B1', U
"⎱"}, {U
'\u2AEE', U
"⫮"}, {U
'\u27ED', U
"⟭"}, {U
'\u21FE', U
"⇾"}, {U
'\u2986', U
"⦆"}, {U
'\U0001d563', U
"𝕣"}, {U
'\u2A2E', U
"⨮"}, {U
'\u2A35', U
"⨵"}, {U
'\u2970', U
"⥰"}, {U
'\u0029', U
")"}, {U
'\u2994', U
"⦔"}, {U
'\u2A12', U
"⨒"}, {U
'\u203A', U
"›"}, {U
'\U0001d4c7', U
"𝓇"}, {U
'\u21B1', U
"↱"}, {U
'\u22CA', U
"⋊"}, {U
'\u25B9', U
"▹"}, {U
'\u29CE', U
"⧎"}, {U
'\u29F4', U
"⧴"}, {U
'\u2968', U
"⥨"}, {U
'\u211E', U
"℞"}, {U
'\u015A', U
"Ś"}, {U
'\u015B', U
"ś"}, {U
'\u2AB8', U
"⪸"}, {U
'\u0160', U
"Š"}, {U
'\u0161', U
"š"}, {U
'\u2ABC', U
"⪼"}, {U
'\u227B', U
"≻"}, {U
'\u227D', U
"≽"}, {U
'\u2AB0', U
"⪰"}, {U
'\u2AB4', U
"⪴"}, {U
'\u015E', U
"Ş"}, {U
'\u015F', U
"ş"}, {U
'\u015C', U
"Ŝ"}, {U
'\u015D', U
"ŝ"}, {U
'\u2ABA', U
"⪺"}, {U
'\u2AB6', U
"⪶"}, {U
'\u22E9', U
"⋩"}, {U
'\u2A13', U
"⨓"}, {U
'\u227F', U
"≿"}, {U
'\u0421', U
"С"}, {U
'\u0441', U
"с"}, {U
'\u22C5', U
"⋅"}, {U
'\u2A66', U
"⩦"}, {U
'\u21D8', U
"⇘"}, {U
'\u00A7', U
"§"}, {U
'\u003B', U
";"}, {U
'\u2929', U
"⤩"}, {U
'\u2736', U
"✶"}, {U
'\U0001d516', U
"𝔖"}, {U
'\U0001d530', U
"𝔰"}, {U
'\u266F', U
"♯"}, {U
'\u0429', U
"Щ"}, {U
'\u0449', U
"щ"}, {U
'\u0428', U
"Ш"}, {U
'\u0448', U
"ш"}, {U
'\u2191', U
"↑"}, {U
'\u00AD', U
"­"}, {U
'\u03A3', U
"Σ"}, {U
'\u03C3', U
"σ"}, {U
'\u03C2', U
"ς"}, {U
'\u223C', U
"∼"}, {U
'\u2A6A', U
"⩪"}, {U
'\u2243', U
"≃"}, {U
'\u2A9E', U
"⪞"}, {U
'\u2AA0', U
"⪠"}, {U
'\u2A9D', U
"⪝"}, {U
'\u2A9F', U
"⪟"}, {U
'\u2246', U
"≆"}, {U
'\u2A24', U
"⨤"}, {U
'\u2972', U
"⥲"}, {U
'\u2A33', U
"⨳"}, {U
'\u29E4', U
"⧤"}, {U
'\u2323', U
"⌣"}, {U
'\u2AAA', U
"⪪"}, {U
'\u2AAC', U
"⪬"}, {U
'\u2AAC', U
""}, {U
'\u042C', U
"Ь"}, {U
'\u044C', U
"ь"}, {U
'\u233F', U
"⌿"}, {U
'\u29C4', U
"⧄"}, {U
'\u002F', U
"/"}, {U
'\U0001d54a', U
"𝕊"}, {U
'\U0001d564', U
"𝕤"}, {U
'\u2660', U
"♠"}, {U
'\u2293', U
"⊓"}, {U
'\u2293', U
""}, {U
'\u2294', U
"⊔"}, {U
'\u2294', U
""}, {U
'\u228F', U
"⊏"}, {U
'\u2291', U
"⊑"}, {U
'\u2290', U
"⊐"}, {U
'\u2292', U
"⊒"}, {U
'\u25A1', U
"□"}, {U
'\U0001d4ae', U
"𝒮"}, {U
'\U0001d4c8', U
"𝓈"}, {U
'\u22C6', U
"⋆"}, {U
'\u2606', U
"☆"}, {U
'\u2282', U
"⊂"}, {U
'\u22D0', U
"⋐"}, {U
'\u2ABD', U
"⪽"}, {U
'\u2AC5', U
"⫅"}, {U
'\u2286', U
"⊆"}, {U
'\u2AC3', U
"⫃"}, {U
'\u2AC1', U
"⫁"}, {U
'\u2ACB', U
"⫋"}, {U
'\u228A', U
"⊊"}, {U
'\u2ABF', U
"⪿"}, {U
'\u2979', U
"⥹"}, {U
'\u2AC7', U
"⫇"}, {U
'\u2AD5', U
"⫕"}, {U
'\u2AD3', U
"⫓"}, {U
'\u2211', U
"∑"}, {U
'\u266A', U
"♪"}, {U
'\u00B9', U
"¹"}, {U
'\u00B2', U
"²"}, {U
'\u00B3', U
"³"}, {U
'\u2283', U
"⊃"}, {U
'\u22D1', U
"⋑"}, {U
'\u2ABE', U
"⪾"}, {U
'\u2AD8', U
"⫘"}, {U
'\u2AC6', U
"⫆"}, {U
'\u2287', U
"⊇"}, {U
'\u2AC4', U
"⫄"}, {U
'\u27C9', U
"⟉"}, {U
'\u2AD7', U
"⫗"}, {U
'\u297B', U
"⥻"}, {U
'\u2AC2', U
"⫂"}, {U
'\u2ACC', U
"⫌"}, {U
'\u228B', U
"⊋"}, {U
'\u2AC0', U
"⫀"}, {U
'\u2AC8', U
"⫈"}, {U
'\u2AD4', U
"⫔"}, {U
'\u2AD6', U
"⫖"}, {U
'\u21D9', U
"⇙"}, {U
'\u292A', U
"⤪"}, {U
'\u00DF', U
"ß"}, {U
'\u0009', U
"	"}, {U
'\u2316', U
"⌖"}, {U
'\u03A4', U
"Τ"}, {U
'\u03C4', U
"τ"}, {U
'\u0164', U
"Ť"}, {U
'\u0165', U
"ť"}, {U
'\u0162', U
"Ţ"}, {U
'\u0163', U
"ţ"}, {U
'\u0422', U
"Т"}, {U
'\u0442', U
"т"}, {U
'\u20DB', U
"⃛"}, {U
'\u2315', U
"⌕"}, {U
'\U0001d517', U
"𝔗"}, {U
'\U0001d531', U
"𝔱"}, {U
'\u2234', U
"∴"}, {U
'\u0398', U
"Θ"}, {U
'\u03B8', U
"θ"}, {U
'\u03D1', U
"ϑ"}, {U
'\u205F', U
""}, {U
'\u2009', U
" "}, {U
'\u00DE', U
"Þ"}, {U
'\u00FE', U
"þ"}, {U
'\u2A31', U
"⨱"}, {U
'\u00D7', U
"×"}, {U
'\u2A30', U
"⨰"}, {U
'\u2336', U
"⌶"}, {U
'\u2AF1', U
"⫱"}, {U
'\U0001d54b', U
"𝕋"}, {U
'\U0001d565', U
"𝕥"}, {U
'\u2ADA', U
"⫚"}, {U
'\u2034', U
"‴"}, {U
'\u2122', U
"™"}, {U
'\u25B5', U
"▵"}, {U
'\u225C', U
"≜"}, {U
'\u25EC', U
"◬"}, {U
'\u2A3A', U
"⨺"}, {U
'\u2A39', U
"⨹"}, {U
'\u29CD', U
"⧍"}, {U
'\u2A3B', U
"⨻"}, {U
'\u23E2', U
"⏢"}, {U
'\U0001d4af', U
"𝒯"}, {U
'\U0001d4c9', U
"𝓉"}, {U
'\u0426', U
"Ц"}, {U
'\u0446', U
"ц"}, {U
'\u040B', U
"Ћ"}, {U
'\u045B', U
"ћ"}, {U
'\u0166', U
"Ŧ"}, {U
'\u0167', U
"ŧ"}, {U
'\u00DA', U
"Ú"}, {U
'\u00FA', U
"ú"}, {U
'\u219F', U
"↟"}, {U
'\u2949', U
"⥉"}, {U
'\u040E', U
"Ў"}, {U
'\u045E', U
"ў"}, {U
'\u016C', U
"Ŭ"}, {U
'\u016D', U
"ŭ"}, {U
'\u00DB', U
"Û"}, {U
'\u00FB', U
"û"}, {U
'\u0423', U
"У"}, {U
'\u0443', U
"у"}, {U
'\u21C5', U
"⇅"}, {U
'\u0170', U
"Ű"}, {U
'\u0171', U
"ű"}, {U
'\u296E', U
"⥮"}, {U
'\u297E', U
"⥾"}, {U
'\U0001d518', U
"𝔘"}, {U
'\U0001d532', U
"𝔲"}, {U
'\u00D9', U
"Ù"}, {U
'\u00F9', U
"ù"}, {U
'\u2963', U
"⥣"}, {U
'\u2580', U
"▀"}, {U
'\u231C', U
"⌜"}, {U
'\u230F', U
"⌏"}, {U
'\u25F8', U
"◸"}, {U
'\u016A', U
"Ū"}, {U
'\u016B', U
"ū"}, {U
'\u23DF', U
"⏟"}, {U
'\u23DD', U
"⏝"}, {U
'\u228E', U
"⊎"}, {U
'\u0172', U
"Ų"}, {U
'\u0173', U
"ų"}, {U
'\U0001d54c', U
"𝕌"}, {U
'\U0001d566', U
"𝕦"}, {U
'\u2912', U
"⤒"}, {U
'\u2195', U
"↕"}, {U
'\u03C5', U
"υ"}, {U
'\u03D2', U
"ϒ"}, {U
'\u03A5', U
"Υ"}, {U
'\u21C8', U
"⇈"}, {U
'\u231D', U
"⌝"}, {U
'\u230E', U
"⌎"}, {U
'\u016E', U
"Ů"}, {U
'\u016F', U
"ů"}, {U
'\u25F9', U
"◹"}, {U
'\U0001d4b0', U
"𝒰"}, {U
'\U0001d4ca', U
"𝓊"}, {U
'\u22F0', U
"⋰"}, {U
'\u0168', U
"Ũ"}, {U
'\u0169', U
"ũ"}, {U
'\u00DC', U
"Ü"}, {U
'\u00FC', U
"ü"}, {U
'\u29A7', U
"⦧"}, {U
'\u299C', U
"⦜"}, {U
'\u228A', U
""}, {U
'\u2ACB', U
""}, {U
'\u228B', U
""}, {U
'\u2ACC', U
""}, {U
'\u2AE8', U
"⫨"}, {U
'\u2AEB', U
"⫫"}, {U
'\u2AE9', U
"⫩"}, {U
'\u0412', U
"В"}, {U
'\u0432', U
"в"}, {U
'\u22A9', U
"⊩"}, {U
'\u22AB', U
"⊫"}, {U
'\u2AE6', U
"⫦"}, {U
'\u22BB', U
"⊻"}, {U
'\u225A', U
"≚"}, {U
'\u22EE', U
"⋮"}, {U
'\u007C', U
"|"}, {U
'\u2016', U
"‖"}, {U
'\u2758', U
"❘"}, {U
'\u2240', U
"≀"}, {U
'\U0001d519', U
"𝔙"}, {U
'\U0001d533', U
"𝔳"}, {U
'\U0001d54d', U
"𝕍"}, {U
'\U0001d567', U
"𝕧"}, {U
'\U0001d4b1', U
"𝒱"}, {U
'\U0001d4cb', U
"𝓋"}, {U
'\u22AA', U
"⊪"}, {U
'\u299A', U
"⦚"}, {U
'\u0174', U
"Ŵ"}, {U
'\u0175', U
"ŵ"}, {U
'\u2A5F', U
"⩟"}, {U
'\u2259', U
"≙"}, {U
'\u2118', U
"℘"}, {U
'\U0001d51a', U
"𝔚"}, {U
'\U0001d534', U
"𝔴"}, {U
'\U0001d54e', U
"𝕎"}, {U
'\U0001d568', U
"𝕨"}, {U
'\U0001d4b2', U
"𝒲"}, {U
'\U0001d4cc', U
"𝓌"}, {U
'\U0001d51b', U
"𝔛"}, {U
'\U0001d535', U
"𝔵"}, {U
'\u039E', U
"Ξ"}, {U
'\u03BE', U
"ξ"}, {U
'\u22FB', U
"⋻"}, {U
'\U0001d54f', U
"𝕏"}, {U
'\U0001d569', U
"𝕩"}, {U
'\U0001d4b3', U
"𝒳"}, {U
'\U0001d4cd', U
"𝓍"}, {U
'\u00DD', U
"Ý"}, {U
'\u00FD', U
"ý"}, {U
'\u042F', U
"Я"}, {U
'\u044F', U
"я"}, {U
'\u0176', U
"Ŷ"}, {U
'\u0177', U
"ŷ"}, {U
'\u042B', U
"Ы"}, {U
'\u044B', U
"ы"}, {U
'\u00A5', U
"¥"}, {U
'\U0001d51c', U
"𝔜"}, {U
'\U0001d536', U
"𝔶"}, {U
'\u0407', U
"Ї"}, {U
'\u0457', U
"ї"}, {U
'\U0001d550', U
"𝕐"}, {U
'\U0001d56a', U
"𝕪"}, {U
'\U0001d4b4', U
"𝒴"}, {U
'\U0001d4ce', U
"𝓎"}, {U
'\u042E', U
"Ю"}, {U
'\u044E', U
"ю"}, {U
'\u00FF', U
"ÿ"}, {U
'\u0178', U
"Ÿ"}, {U
'\u0179', U
"Ź"}, {U
'\u017A', U
"ź"}, {U
'\u017D', U
"Ž"}, {U
'\u017E', U
"ž"}, {U
'\u0417', U
"З"}, {U
'\u0437', U
"з"}, {U
'\u017B', U
"Ż"}, {U
'\u017C', U
"ż"}, {U
'\u2128', U
"ℨ"}, {U
'\u0396', U
"Ζ"}, {U
'\u03B6', U
"ζ"}, {U
'\U0001d537', U
"𝔷"}, {U
'\u0416', U
"Ж"}, {U
'\u0436', U
"ж"}, {U
'\u21DD', U
"⇝"}, {U
'\U0001d56b', U
"𝕫"}, {U
'\U0001d4b5', U
"𝒵"}, {U
'\U0001d4cf', U
"𝓏"}, {U
'\u200D', U
"‍"}, {U
'\u200C', U
"‌"}};
43 unordered_map<char32_t, pair<char32_t, u32string>>
const htmlLookaheads =
44 {{U
'\u223E', make_pair(U
'\u0333', U
"∾̳")}, {U
'\u003D', make_pair(U
'\u20E5', U
"=⃥")}, {U
'\u2261', make_pair(U
'\u20E5', U
"≡⃥")}, {U
'\u2229', make_pair(U
'\uFE00', U
"∩︀")}, {U
'\u222A', make_pair(U
'\uFE00', U
"∪︀")},
46 {U
'\u22DB', make_pair(U
'\uFE00', U
"⋛︀")},
47 {U
'\u2269', make_pair(U
'\uFE00', U
"≩︀")},
48 {U
'\u2AAD', make_pair(U
'\uFE00', U
"⪭︀")},
49 {U
'\u22DA', make_pair(U
'\uFE00', U
"⋚︀")},
50 {U
'\u2268', make_pair(U
'\uFE00', U
"≨︀")},
51 {U
'\u2220', make_pair(U
'\u20D2', U
"∠⃒")},
52 {U
'\u2A70', make_pair(U
'\u0338', U
"⩰̸")},
53 {U
'\u224B', make_pair(U
'\u0338', U
"≋̸")},
54 {U
'\u224E', make_pair(U
'\u0338', U
"≎̸")},
55 {U
'\u224F', make_pair(U
'\u0338', U
"≏̸")},
56 {U
'\u2A6D', make_pair(U
'\u0338', U
"⩭̸")},
57 {U
'\u2250', make_pair(U
'\u0338', U
"≐̸")},
58 {U
'\u2242', make_pair(U
'\u0338', U
"≂̸")},
59 {U
'\u2267', make_pair(U
'\u0338', U
"≧̸")},
60 {U
'\u2A7E', make_pair(U
'\u0338', U
"⩾̸")},
61 {U
'\u22D9', make_pair(U
'\u0338', U
"⋙̸")},
62 {U
'\u226B', make_pair(U
'\u20D2', U
"≫⃒")},
63 {U
'\u226B', make_pair(U
'\u0338', U
"≫̸")},
64 {U
'\u2266', make_pair(U
'\u0338', U
"≦̸")},
65 {U
'\u2A7D', make_pair(U
'\u0338', U
"⩽̸")},
66 {U
'\u22D8', make_pair(U
'\u0338', U
"⋘̸")},
67 {U
'\u226A', make_pair(U
'\u20D2', U
"≪⃒")},
68 {U
'\u226A', make_pair(U
'\u0338', U
"≪̸")},
69 {U
'\u22F5', make_pair(U
'\u0338', U
"⋵̸")},
70 {U
'\u22F9', make_pair(U
'\u0338', U
"⋹̸")},
71 {U
'\u29CF', make_pair(U
'\u0338', U
"⧏̸")},
72 {U
'\u2AA2', make_pair(U
'\u0338', U
"⪢̸")},
73 {U
'\u2AA1', make_pair(U
'\u0338', U
"⪡̸")},
74 {U
'\u2AAF', make_pair(U
'\u0338', U
"⪯̸")},
75 {U
'\u29D0', make_pair(U
'\u0338', U
"⧐̸")},
76 {U
'\u228F', make_pair(U
'\u0338', U
"⊏̸")},
77 {U
'\u2290', make_pair(U
'\u0338', U
"⊐̸")},
78 {U
'\u2282', make_pair(U
'\u20D2', U
"⊂⃒")},
79 {U
'\u2AB0', make_pair(U
'\u0338', U
"⪰̸")},
80 {U
'\u227F', make_pair(U
'\u0338', U
"≿̸")},
81 {U
'\u2283', make_pair(U
'\u20D2', U
"⊃⃒")},
82 {U
'\u2AFD', make_pair(U
'\u20E5', U
"⫽⃥")},
83 {U
'\u2202', make_pair(U
'\u0338', U
"∂̸")},
84 {U
'\u2933', make_pair(U
'\u0338', U
"⤳̸")},
85 {U
'\u219D', make_pair(U
'\u0338', U
"↝̸")},
86 {U
'\u2AC5', make_pair(U
'\u0338', U
"⫅̸")},
87 {U
'\u2AC6', make_pair(U
'\u0338', U
"⫆̸")},
88 {U
'\u224D', make_pair(U
'\u20D2', U
"≍⃒")},
89 {U
'\u2265', make_pair(U
'\u20D2', U
"≥⃒")},
90 {U
'\u003E', make_pair(U
'\u20D2', U
">⃒")},
91 {U
'\u2264', make_pair(U
'\u20D2', U
"≤⃒")},
92 {U
'\u003C', make_pair(U
'\u20D2', U
"<⃒")},
93 {U
'\u22B4', make_pair(U
'\u20D2', U
"⊴⃒")},
94 {U
'\u22B5', make_pair(U
'\u20D2', U
"⊵⃒")},
95 {U
'\u223C', make_pair(U
'\u20D2', U
"∼⃒")},
96 {U
'\u223D', make_pair(U
'\u0331', U
"∽̱")},
97 {U
'\u2AAC', make_pair(U
'\uFE00', U
"⪬︀")},
98 {U
'\u2293', make_pair(U
'\uFE00', U
"⊓︀")},
99 {U
'\u2294', make_pair(U
'\uFE00', U
"⊔︀")},
100 {U
'\u205F', make_pair(U
'\u200A', U
"  ")},
101 {U
'\u228A', make_pair(U
'\uFE00', U
"⊊︀")},
102 {U
'\u2ACB', make_pair(U
'\uFE00', U
"⫋︀")},
103 {U
'\u228B', make_pair(U
'\uFE00', U
"⊋︀")},
104 {U
'\u2ACC', make_pair(U
'\uFE00', U
"⫌︀")}};
106 unordered_map<u32string, pair<char32_t, char32_t>> htmlDecodeTable;
108 unordered_set<char>
const urlUnreserved =
109 {
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
110 'T',
'U',
'V',
'W',
'X',
'Y',
'Z',
'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i',
'j',
'k',
'l',
111 'm',
'n',
'o',
'p',
'q',
'r',
's',
't',
'u',
'v',
'w',
'x',
'y',
'z',
'0',
'1',
'2',
'3',
'4',
112 '5',
'6',
'7',
'8',
'9',
'-',
'_',
'.',
'~'};
114 void initializeHtmlDecodeTable() {
116 for (
auto& e : htmlEntities) {
118 if (!e.second.empty()) {
119 htmlDecodeTable.insert(make_pair(e.second, make_pair(e.first, U
'\0')));
120 }
else if (htmlLookaheads.count(e.first) == 1) {
121 auto const& lookahead = htmlLookaheads.at(e.first);
122 char32_t secondChar = lookahead.first;
123 htmlDecodeTable.insert(make_pair(lookahead.second, make_pair(e.first, secondChar)));
128 htmlDecodeTable.insert(make_pair(U
"fj", make_pair(U
'\u0066', U
'\u006A')));
135 boost::replace_all(input,
"&",
"&");
136 boost::replace_all(input,
"\"",
""");
137 boost::replace_all(input,
"<",
"<");
138 boost::replace_all(input,
">",
">");
141 wstring_convert<codecvt_utf8<char32_t>, char32_t> cv;
142 u32string uinput = cv.from_bytes(input);
143 ostringstream output;
144 char32_t lookahead =
'\0';
145 for (char32_t c : uinput) {
146 if (lookahead !=
'\0') {
149 if (htmlLookaheads.count(lookahead) != 1) {
150 output << cv.to_bytes(lookahead);
154 auto& currentLA = htmlLookaheads.at(lookahead);
155 if (currentLA.first == c) {
156 output << cv.to_bytes(currentLA.second);
160 output << cv.to_bytes(lookahead);
165 if (htmlEntities.count(c) == 1) {
166 if (htmlEntities.at(c).empty()) {
170 output << cv.to_bytes(htmlEntities.at(c));
173 output << cv.to_bytes(c);
177 if (lookahead !=
'\0') {
178 output << cv.to_bytes(lookahead);
180 input = output.str();
187 if (htmlDecodeTable.empty())
188 initializeHtmlDecodeTable();
190 regex matchEntity(R
"(&[A-Za-z0-9]{1,25}?;)");
193 auto replaceEntity = [](vector<string>
const& matches) ->
string {
194 wstring_convert<codecvt_utf8<char32_t>, char32_t> cv;
195 auto entity32 = cv.from_bytes(matches.at(0));
196 if (htmlDecodeTable.count(entity32) != 1) {
197 return matches.at(0);
200 ret << cv.to_bytes(htmlDecodeTable.at(entity32).first);
201 auto secondChar = htmlDecodeTable.at(entity32).second;
202 if (secondChar !=
'\0') {
203 ret << cv.to_bytes(secondChar);
213 regex matchUnicode(R
"(&#(x([A-Fa-f0-9]{1,5})|([0-9]{1,6}));)");
214 auto replaceUnicode = [](vector<string>
const& matches) ->
string {
215 wstring_convert<codecvt_utf8<char32_t>, char32_t> cv;
218 if (matches.size() < 3) {
221 if (matches.size() > 3 && !matches.at(3).empty()) {
222 c = (char32_t) stoul(matches.at(3));
225 else if (!matches.at(2).empty()) {
226 c = (char32_t) stoul(matches.at(2),
nullptr, 16);
230 u32string outs(1, c);
231 return cv.to_bytes(outs);
242 for (
char c : input) {
243 if (urlUnreserved.count(c) == 1) {
248 out <<
'%' << uppercase << hex << setw(2) << setfill(
'0') << (int) (
unsigned char) c;
258 regex matchCode(R
"(%([0-9A-F]{2}))");
261 auto replaceCode = [](vector<string>
const& matches) ->
string {
262 return string(1, (
char) stoul(matches.at(1),
nullptr, 16));
272 if (allowWhitespaces) {
273 rgx.assign(R
"([A-Za-z0-9\+/ \t\n\r]+={0,2})");
275 rgx.assign(R
"([A-Za-z0-9\+/]+={0,2})");
277 return regex_match(input, rgx);
282 return base64_encode(
reinterpret_cast<unsigned char const*
>(input.c_str()), input.length(), breakAfter,
287 return base64_decode(input);
293 for (
const char c : input) {
294 if (c >= 32 && c <= 126 && c != 61 && !(qEncoding && (c == 32 || c == 63 || c == 95))) {
295 if (!qEncoding && lineCount >= 75) {
296 ret <<
"=" << lineEnding;
301 }
else if (!replaceCrlf && (c == 10 || c == 13)) {
304 }
else if (qEncoding && c == 32) {
309 if (!qEncoding && lineCount >= 73) {
310 ret <<
"=" << lineEnding;
325 regex matchCode(R
"(=([0-9A-F]{2}|\r?\n))");
328 auto replaceCode = [](vector<string>
const& matches) ->
string {
329 return (matches.at(1) ==
"\r\n" || matches.at(1) ==
"\n")
331 : string(1, (
char) stoul(matches.at(1),
nullptr, 16));
349 ret <<
"Q?" << qpEncoded;
356 wstring_convert<codecvt_utf8<char32_t>, char32_t> cv;
359 bool firstPart =
true;
360 for (
auto const& part : partsToBeEncoded) {
361 basic_string<char32_t> wstr = cv.from_bytes(part);
362 string pret(part.size() * 4,
'\0');
363 size_t pretSize = pret.size();
364 punycode_encode(
reinterpret_cast<uint32_t const*
>(wstr.c_str()), wstr.size(), &pret[0], &pretSize);
365 pret.resize(pretSize);
381 wstring_convert<codecvt_utf8<char32_t>, char32_t> cv;
384 bool firstPart =
true;
385 for (
string part : partsToBeDecoded) {
386 if (part.substr(0, 4) ==
"xn--") {
387 part = part.substr(4);
398 basic_string<char32_t> wpret(part.size(), L
'\0');
399 size_t wpretSize = wpret.size();
400 punycode_decode(part.c_str(), part.length(),
reinterpret_cast<uint32_t*
>(&wpret[0]), &wpretSize);
401 wpret.resize(wpretSize);
408 ret << cv.to_bytes(wpret);
int init(nawa::AppInit &appInit)
Namespace containing functions for text encoding and decoding.
std::string makeEncodedWord(std::string const &input, bool base64=false, bool onlyIfNecessary=true)
std::string base64Decode(std::string const &input)
std::string quotedPrintableDecode(std::string input, bool qEncoding=false)
std::string base64Encode(std::string const &input, size_t breakAfter=0, std::string const &breakSequence="")
std::string quotedPrintableEncode(std::string const &input, std::string const &lineEnding="\r\n", bool replaceCrlf=false, bool qEncoding=false)
std::string punycodeEncode(std::string const &input)
std::string punycodeDecode(std::string const &input)
std::string urlEncode(std::string const &input)
bool isBase64(std::string const &input, bool allowWhitespaces=true)
std::string urlDecode(std::string input)
std::string htmlEncode(std::string input, bool encodeAll=false)
std::string htmlDecode(std::string input)
void regexReplaceCallback(std::string &s, std::regex const &rgx, std::function< std::string(std::vector< std::string > const &)> const &fmt)
std::string hexDump(std::string const &in)
std::string stringReplace(std::string input, std::unordered_map< char, char > const &patterns)
std::vector< std::string > splitString(std::string str, char delimiter, bool ignoreEmpty=false)
std::string toUppercase(std::string s)
Contains useful functions that improve the readability and facilitate maintenance of the NAWA code.