-- Leo's gemini proxy
-- Connecting to git.thebackupbox.net:1965...
-- Connected
-- Sending request
-- Meta line: 20 text/gemini
repo: html_entities_decode action: commit revision: path_from: revision_from: 02352168edd05bf9f4fda5344fe1977aec376fd9: path_to: revision_to:
commit 02352168edd05bf9f4fda5344fe1977aec376fd9 Author: epoch <epoch@hack.thebackupbox.net> Date: Sat Sep 21 08:04:28 2019 +0000 used gperf to add hash-table lookup instead of derpily looping over everything every check diff --git a/Makefile b/Makefile
--- a/Makefile +++ b/Makefile @@ -1,6 +1,25 @@ -.PHONEY: all clean +PREFIX:=/usr/local +CFLAGS:=-DGPERF + +.PHONEY: all clean install veryclean all: html_entities_decode +install: all + install html_entities_decode $(PREFIX)/bin + +veryclean: clean + rm -f entities_gperf.h + rm -f entities_h.h + clean: rm -f html_entities_decode + +entities_h.h: entities.h + printf 'struct entity { char *name; char *value; }\n%%%%\n' > entities_h.h + sed 's/^ //g' entities.h | tail -n+2 | tac | tail -n+3 | tac >> entities_h.h + +entities_gperf.h: entities_h.h + gperf -t entities_h.h > entities_gperf.h + +test: test.c diff --git a/entities_gperf.h b/entities_gperf.h new file mode 100644 index 0000000000000000000000000000000000000000..e828a83aba6db6152a371294ab84f5d80a7cb149 --- /dev/null +++ b/entities_gperf.h @@ -0,0 +1,758 @@ +/* ANSI-C code produced by gperf version 3.1 */ +/* Command-line: gperf -t entities_h.h */ +/* Computed positions: -k'1-3,5,$' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>." +#endif + +#line 1 "entities_h.h" +struct entity { char *name; char *value; }; + +#define TOTAL_KEYWORDS 252 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 8 +#define MIN_HASH_VALUE 8 +#define MAX_HASH_VALUE 617 +/* maximum key range = 610, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register size_t len) +{ + static unsigned short asso_values[] = + { + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 0, + 20, 10, 0, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 135, 165, 55, 90, 225, + 10, 0, 10, 205, 618, 20, 5, 0, 15, 73, + 30, 618, 5, 30, 10, 20, 618, 618, 10, 165, + 0, 618, 618, 618, 618, 618, 618, 5, 60, 50, + 0, 15, 144, 115, 160, 10, 215, 10, 95, 125, + 25, 0, 5, 207, 90, 20, 0, 65, 35, 35, + 35, 194, 5, 5, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618, 618, 618, 618, + 618, 618, 618, 618, 618, 618, 618 + }; + register unsigned int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[4]]; + /*FALLTHROUGH*/ + case 4: + case 3: + hval += asso_values[(unsigned char)str[2]]; + /*FALLTHROUGH*/ + case 2: + hval += asso_values[(unsigned char)str[1]+1]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +struct entity * +in_word_set (register const char *str, register size_t len) +{ + static struct entity wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 190 "entities_h.h" + {"and","∧",}, + {""}, {""}, {""}, {""}, +#line 194 "entities_h.h" + {"int","∫",}, + {""}, {""}, {""}, {""}, +#line 116 "entities_h.h" + {"Rho","Ρ",}, + {""}, {""}, {""}, {""}, {""}, +#line 132 "entities_h.h" + {"iota","ι",}, + {""}, {""}, {""}, +#line 147 "entities_h.h" + {"psi","ψ",}, +#line 182 "entities_h.h" + {"prod","∏",}, + {""}, {""}, {""}, +#line 15 "entities_h.h" + {"not","¬",}, +#line 187 "entities_h.h" + {"prop","∝",}, + {""}, {""}, {""}, +#line 145 "entities_h.h" + {"phi","φ",}, +#line 211 "entities_h.h" + {"sdot","⋅",}, +#line 131 "entities_h.h" + {"theta","θ",}, + {""}, {""}, +#line 224 "entities_h.h" + {"amp","&",}, +#line 234 "entities_h.h" + {"ensp"," ",}, + {""}, +#line 195 "entities_h.h" + {"there4","∴",}, + {""}, {""}, +#line 179 "entities_h.h" + {"isin","∈",}, +#line 107 "entities_h.h" + {"Theta","Θ",}, +#line 236 "entities_h.h" + {"thinsp"," ",}, + {""}, +#line 122 "entities_h.h" + {"Psi","Ψ",}, + {""}, +#line 148 "entities_h.h" + {"omega","ω",}, +#line 230 "entities_h.h" + {"scaron","š",}, + {""}, {""}, {""}, +#line 161 "entities_h.h" + {"trade","™",}, + {""}, {""}, +#line 120 "entities_h.h" + {"Phi","Φ",}, +#line 28 "entities_h.h" + {"sup1","¹",}, +#line 97 "entities_h.h" + {"thorn","þ",}, +#line 229 "entities_h.h" + {"Scaron","Š",}, + {""}, +#line 204 "entities_h.h" + {"sup","⊃",}, +#line 235 "entities_h.h" + {"emsp"," ",}, +#line 154 "entities_h.h" + {"prime","′",}, + {""}, {""}, {""}, +#line 22 "entities_h.h" + {"sup3","³",}, +#line 159 "entities_h.h" + {"image","ℑ",}, + {""}, {""}, {""}, +#line 207 "entities_h.h" + {"supe","⊇",}, +#line 6 "entities_h.h" + {"pound","£",}, + {""}, {""}, +#line 146 "entities_h.h" + {"chi","χ",}, +#line 21 "entities_h.h" + {"sup2","²",}, +#line 180 "entities_h.h" + {"notin","∉",}, + {""}, {""}, +#line 121 "entities_h.h" + {"Chi","Χ",}, + {""}, +#line 133 "entities_h.h" + {"kappa","κ",}, + {""}, {""}, +#line 130 "entities_h.h" + {"eta","η",}, + {""}, +#line 155 "entities_h.h" + {"Prime","″",}, +#line 88 "entities_h.h" + {"otilde","õ",}, + {""}, +#line 193 "entities_h.h" + {"cup","∪",}, + {""}, +#line 109 "entities_h.h" + {"Kappa","Κ",}, +#line 70 "entities_h.h" + {"atilde","ã",}, +#line 111 "entities_h.h" + {"Mu","Μ",}, +#line 140 "entities_h.h" + {"rho","ρ",}, +#line 3 "entities_h.h" + {"nbsp"," ",}, +#line 23 "entities_h.h" + {"acute","´",}, + {""}, {""}, {""}, {""}, {""}, +#line 52 "entities_h.h" + {"Ntilde","Ñ",}, +#line 191 "entities_h.h" + {"or","∨",}, +#line 218 "entities_h.h" + {"loz","◊",}, + {""}, +#line 87 "entities_h.h" + {"ocirc","ô",}, +#line 209 "entities_h.h" + {"otimes","⊗",}, +#line 112 "entities_h.h" + {"Nu","Ν",}, + {""}, {""}, +#line 69 "entities_h.h" + {"acirc","â",}, +#line 84 "entities_h.h" + {"ntilde","ñ",}, + {""}, +#line 192 "entities_h.h" + {"cap","∩",}, + {""}, +#line 81 "entities_h.h" + {"icirc","î",}, + {""}, +#line 136 "entities_h.h" + {"nu","ν",}, +#line 123 "entities_h.h" + {"Omega","Ω",}, + {""}, +#line 77 "entities_h.h" + {"ecirc","ê",}, +#line 86 "entities_h.h" + {"oacute","ó",}, + {""}, {""}, +#line 206 "entities_h.h" + {"sube","⊆",}, +#line 62 "entities_h.h" + {"Ucirc","Û",}, +#line 68 "entities_h.h" + {"aacute","á",}, + {""}, {""}, {""}, {""}, +#line 80 "entities_h.h" + {"iacute","í",}, + {""}, {""}, +#line 254 "entities_h.h" + {"euro","€",}, + {""}, +#line 76 "entities_h.h" + {"eacute","é",}, + {""}, {""}, +#line 29 "entities_h.h" + {"ordm","º",}, +#line 124 "entities_h.h" + {"alpha","α",}, +#line 61 "entities_h.h" + {"Uacute","Ú",}, + {""}, +#line 105 "entities_h.h" + {"Zeta","Ζ",}, +#line 205 "entities_h.h" + {"nsub","⊄",}, + {""}, {""}, +#line 138 "entities_h.h" + {"omicron","ο",}, +#line 129 "entities_h.h" + {"zeta","ζ",}, +#line 175 "entities_h.h" + {"part","∂",}, +#line 178 "entities_h.h" + {"nabla","∇",}, +#line 185 "entities_h.h" + {"lowast","∗",}, +#line 225 "entities_h.h" + {"lt","<",}, +#line 149 "entities_h.h" + {"thetasym","ϑ",}, +#line 25 "entities_h.h" + {"para","¶",}, + {""}, +#line 253 "entities_h.h" + {"rsaquo","›",}, + {""}, +#line 13 "entities_h.h" + {"ordf","ª",}, +#line 56 "entities_h.h" + {"Otilde","Õ",}, +#line 156 "entities_h.h" + {"oline","‾",}, +#line 252 "entities_h.h" + {"lsaquo","‹",}, + {""}, {""}, {""}, +#line 31 "entities_h.h" + {"frac14","¼",}, +#line 74 "entities_h.h" + {"ccedil","ç",}, + {""}, +#line 203 "entities_h.h" + {"sub","⊂",}, + {""}, +#line 94 "entities_h.h" + {"ucirc","û",}, +#line 42 "entities_h.h" + {"Ccedil","Ç",}, +#line 226 "entities_h.h" + {"gt",">",}, + {""}, {""}, +#line 33 "entities_h.h" + {"frac34","¾",}, +#line 199 "entities_h.h" + {"ne","≠",}, + {""}, +#line 55 "entities_h.h" + {"Ocirc","Ô",}, + {""}, {""}, +#line 34 "entities_h.h" + {"iquest","¿",}, + {""}, +#line 143 "entities_h.h" + {"tau","τ",}, + {""}, +#line 32 "entities_h.h" + {"frac12","½",}, +#line 93 "entities_h.h" + {"uacute","ú",}, + {""}, {""}, +#line 197 "entities_h.h" + {"cong","≅",}, +#line 102 "entities_h.h" + {"Gamma","Γ",}, +#line 110 "entities_h.h" + {"Lambda","Λ",}, + {""}, +#line 118 "entities_h.h" + {"Tau","Τ",}, +#line 54 "entities_h.h" + {"Oacute","Ó",}, + {""}, {""}, {""}, {""}, +#line 188 "entities_h.h" + {"infin","∞",}, + {""}, {""}, {""}, +#line 125 "entities_h.h" + {"beta","β",}, +#line 198 "entities_h.h" + {"asymp","≈",}, + {""}, +#line 9 "entities_h.h" + {"brvbar","¦",}, + {""}, +#line 10 "entities_h.h" + {"sect","§",}, +#line 108 "entities_h.h" + {"Iota","Ι",}, + {""}, +#line 7 "entities_h.h" + {"curren","¤",}, + {""}, +#line 5 "entities_h.h" + {"cent","¢",}, +#line 176 "entities_h.h" + {"exist","∃",}, + {""}, {""}, +#line 135 "entities_h.h" + {"mu","μ",}, + {""}, {""}, +#line 114 "entities_h.h" + {"Omicron","Ο",}, +#line 38 "entities_h.h" + {"Atilde","Ã",}, +#line 139 "entities_h.h" + {"pi","π",}, + {""}, {""}, {""}, {""}, +#line 113 "entities_h.h" + {"Xi","Ξ",}, +#line 189 "entities_h.h" + {"ang","∠",}, + {""}, {""}, {""}, {""}, {""}, +#line 166 "entities_h.h" + {"darr","↓",}, +#line 200 "entities_h.h" + {"equiv","≡",}, +#line 223 "entities_h.h" + {"quot",""",}, + {""}, +#line 210 "entities_h.h" + {"perp","⊥",}, + {""}, +#line 37 "entities_h.h" + {"Acirc","Â",}, + {""}, +#line 181 "entities_h.h" + {"ni","∋",}, + {""}, +#line 127 "entities_h.h" + {"delta","δ",}, +#line 186 "entities_h.h" + {"radic","√",}, +#line 201 "entities_h.h" + {"le","≤",}, +#line 115 "entities_h.h" + {"Pi","Π",}, +#line 12 "entities_h.h" + {"copy","©",}, +#line 89 "entities_h.h" + {"ouml","ö",}, +#line 168 "entities_h.h" + {"crarr","↵",}, + {""}, +#line 137 "entities_h.h" + {"xi","ξ",}, + {""}, +#line 71 "entities_h.h" + {"auml","ä",}, +#line 222 "entities_h.h" + {"diams","♦",}, +#line 36 "entities_h.h" + {"Aacute","Á",}, + {""}, +#line 51 "entities_h.h" + {"ETH","Ð",}, +#line 82 "entities_h.h" + {"iuml","ï",}, +#line 72 "entities_h.h" + {"aring","å",}, +#line 90 "entities_h.h" + {"divide","÷",}, + {""}, +#line 219 "entities_h.h" + {"spades","♠",}, +#line 78 "entities_h.h" + {"euml","ë",}, + {""}, +#line 202 "entities_h.h" + {"ge","≥",}, + {""}, {""}, +#line 63 "entities_h.h" + {"Uuml","Ü",}, +#line 100 "entities_h.h" + {"Alpha","Α",}, +#line 91 "entities_h.h" + {"oslash","ø",}, +#line 245 "entities_h.h" + {"sbquo","‚",}, +#line 11 "entities_h.h" + {"uml","¨",}, +#line 237 "entities_h.h" + {"zwnj","‌",}, +#line 220 "entities_h.h" + {"clubs","♣",}, +#line 249 "entities_h.h" + {"dagger","†",}, +#line 248 "entities_h.h" + {"bdquo","„",}, + {""}, +#line 152 "entities_h.h" + {"bull","•",}, +#line 158 "entities_h.h" + {"weierp","℘",}, +#line 134 "entities_h.h" + {"lambda","λ",}, +#line 99 "entities_h.h" + {"fnof","ƒ",}, +#line 151 "entities_h.h" + {"piv","ϖ",}, +#line 217 "entities_h.h" + {"rang","〉",}, + {""}, +#line 64 "entities_h.h" + {"Yacute","Ý",}, +#line 162 "entities_h.h" + {"alefsym","ℵ",}, + {""}, +#line 216 "entities_h.h" + {"lang","〈",}, +#line 213 "entities_h.h" + {"rceil","⌉",}, + {""}, +#line 244 "entities_h.h" + {"rsquo","’",}, +#line 106 "entities_h.h" + {"Eta","Η",}, + {""}, +#line 212 "entities_h.h" + {"lceil","⌈",}, +#line 85 "entities_h.h" + {"ograve","ò",}, +#line 243 "entities_h.h" + {"lsquo","‘",}, +#line 183 "entities_h.h" + {"sum","∑",}, +#line 164 "entities_h.h" + {"uarr","↑",}, + {""}, +#line 67 "entities_h.h" + {"agrave","à",}, + {""}, {""}, {""}, +#line 126 "entities_h.h" + {"gamma","γ",}, +#line 79 "entities_h.h" + {"igrave","ì",}, +#line 247 "entities_h.h" + {"rdquo","”",}, +#line 101 "entities_h.h" + {"Beta","Β",}, + {""}, +#line 49 "entities_h.h" + {"Icirc","Î",}, +#line 75 "entities_h.h" + {"egrave","è",}, +#line 246 "entities_h.h" + {"ldquo","“",}, +#line 65 "entities_h.h" + {"THORN","Þ",}, +#line 95 "entities_h.h" + {"uuml","ü",}, +#line 96 "entities_h.h" + {"yacute","ý",}, +#line 60 "entities_h.h" + {"Ugrave","Ù",}, + {""}, {""}, +#line 18 "entities_h.h" + {"macr","¯",}, + {""}, {""}, +#line 57 "entities_h.h" + {"Ouml","Ö",}, + {""}, +#line 165 "entities_h.h" + {"rarr","→",}, +#line 221 "entities_h.h" + {"hearts","♥",}, +#line 48 "entities_h.h" + {"Iacute","Í",}, + {""}, +#line 160 "entities_h.h" + {"real","ℜ",}, +#line 163 "entities_h.h" + {"larr","←",}, +#line 45 "entities_h.h" + {"Ecirc","Ê",}, + {""}, {""}, {""}, +#line 103 "entities_h.h" + {"Delta","Δ",}, +#line 233 "entities_h.h" + {"tilde","˜",}, +#line 26 "entities_h.h" + {"middot","·",}, +#line 208 "entities_h.h" + {"oplus","⊕",}, + {""}, +#line 172 "entities_h.h" + {"dArr","⇓",}, +#line 251 "entities_h.h" + {"permil","‰",}, +#line 20 "entities_h.h" + {"plusmn","±",}, + {""}, {""}, +#line 59 "entities_h.h" + {"Oslash","Ø",}, +#line 66 "entities_h.h" + {"szlig","ß",}, +#line 44 "entities_h.h" + {"Eacute","É",}, + {""}, {""}, {""}, {""}, {""}, +#line 30 "entities_h.h" + {"raquo","»",}, + {""}, +#line 157 "entities_h.h" + {"frasl","⁄",}, +#line 142 "entities_h.h" + {"sigma","σ",}, + {""}, +#line 14 "entities_h.h" + {"laquo","«",}, +#line 239 "entities_h.h" + {"lrm","‎",}, +#line 128 "entities_h.h" + {"epsilon","ε",}, +#line 241 "entities_h.h" + {"ndash","–",}, +#line 92 "entities_h.h" + {"ugrave","ù",}, + {""}, {""}, +#line 119 "entities_h.h" + {"Upsilon","Υ",}, +#line 117 "entities_h.h" + {"Sigma","Σ",}, +#line 250 "entities_h.h" + {"Dagger","‡",}, +#line 19 "entities_h.h" + {"deg","°",}, + {""}, +#line 53 "entities_h.h" + {"Ograve","Ò",}, + {""}, {""}, {""}, {""}, +#line 4 "entities_h.h" + {"iexcl","¡",}, +#line 58 "entities_h.h" + {"times","×",}, + {""}, {""}, {""}, +#line 27 "entities_h.h" + {"cedil","¸",}, + {""}, +#line 8 "entities_h.h" + {"yen","¥",}, + {""}, {""}, +#line 39 "entities_h.h" + {"Auml","Ä",}, +#line 24 "entities_h.h" + {"micro","µ",}, +#line 215 "entities_h.h" + {"rfloor","⌋",}, + {""}, {""}, {""}, +#line 40 "entities_h.h" + {"Aring","Å",}, +#line 214 "entities_h.h" + {"lfloor","⌊",}, + {""}, +#line 83 "entities_h.h" + {"eth","ð",}, +#line 167 "entities_h.h" + {"harr","↔",}, + {""}, {""}, {""}, {""}, +#line 232 "entities_h.h" + {"circ","ˆ",}, +#line 184 "entities_h.h" + {"minus","−",}, + {""}, {""}, +#line 227 "entities_h.h" + {"OElig","Œ",}, +#line 170 "entities_h.h" + {"uArr","⇑",}, + {""}, {""}, {""}, {""}, +#line 144 "entities_h.h" + {"upsilon","υ",}, +#line 153 "entities_h.h" + {"hellip","…",}, +#line 16 "entities_h.h" + {"shy","­",}, + {""}, {""}, +#line 231 "entities_h.h" + {"Yuml","Ÿ",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 174 "entities_h.h" + {"forall","∀",}, + {""}, {""}, +#line 177 "entities_h.h" + {"empty","∅",}, +#line 171 "entities_h.h" + {"rArr","⇒",}, + {""}, +#line 35 "entities_h.h" + {"Agrave","À",}, + {""}, {""}, +#line 169 "entities_h.h" + {"lArr","⇐",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 98 "entities_h.h" + {"yuml","ÿ",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 50 "entities_h.h" + {"Iuml","Ï",}, + {""}, {""}, +#line 17 "entities_h.h" + {"reg","®",}, +#line 240 "entities_h.h" + {"rlm","‏",}, + {""}, +#line 242 "entities_h.h" + {"mdash","—",}, + {""}, {""}, +#line 238 "entities_h.h" + {"zwj","‍",}, +#line 228 "entities_h.h" + {"oelig","œ",}, +#line 41 "entities_h.h" + {"AElig","Æ",}, + {""}, {""}, {""}, +#line 73 "entities_h.h" + {"aelig","æ",}, + {""}, {""}, {""}, {""}, +#line 46 "entities_h.h" + {"Euml","Ë",}, + {""}, {""}, {""}, +#line 196 "entities_h.h" + {"sim","∼",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 141 "entities_h.h" + {"sigmaf","ς",}, + {""}, {""}, {""}, +#line 173 "entities_h.h" + {"hArr","⇔",}, + {""}, +#line 47 "entities_h.h" + {"Igrave","Ì",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 43 "entities_h.h" + {"Egrave","È",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 104 "entities_h.h" + {"Epsilon","Ε",}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 150 "entities_h.h" + {"upsih","ϒ",} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register unsigned int key = hash (str, len); + + if (key <= MAX_HASH_VALUE) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/html_entities_decode.c b/html_entities_decode.c
--- a/html_entities_decode.c +++ b/html_entities_decode.c @@ -1,6 +1,26 @@ #include <stdio.h> -#include <string.h> +#include <string.h> +#include <unistd.h>//write() +#include <stdlib.h>//strtol() +#ifdef GPERF +#include "entities_gperf.h" +char *get_entity(char *name) { + struct entity *e=in_word_set(name,strlen(name)); + if(e) return e->value; + else return 0; +} +#else #include "entities.h" +char *get_entity(char *name) { + int i; + for(i=0;entities[i];i+=2) { + if(!strcmp(name,entities[i])) { + return entities[i+1]; + } + } + return 0; +} +#endif // https://www.w3.org/MarkUp/html-spec/html-spec_3.html#SEC3.2.3 #define NAMELEN 72 @@ -8,15 +28,12 @@ void print_entity(char *name,int len) { int i; unsigned int c;//we can store one unicode point in here. int l; + char *t; char b[10];//dunno name[len-1]=0;//fuck it. we'll null out the ; and we can play with this string as a C string. if(*name != '&') printf("how in the hell did this happen?!?\n"); - for(i=0;entities[i];i+=2) { - if(!strcmp(name+1,entities[i])) { - name=entities[i+1]; - break; - } - } + t=get_entity(name+1); //skip the leading & + if(t) name=t; if(!strncasecmp(name,"&#x",3)) { c=strtol(name+3,0,16); //we have some hex here. need to convert to decimal. @@ -42,10 +59,10 @@ int main(int argc,char *argv[]) { char in_entity_name=0; int i=0; short in; - for(;(in=fgetc(stdin)) != -1;) { + for(;(in=fgetc(stdin)) != -1;) {//this loop needs to be fixed to read larger amounts of data so it'll go faster buffer[i]=in; i++; - if(in_entity_name && i < NAMELEN && + if(in_entity_name && i < NAMELEN && ( (i > 1 && in == ';') || //if we have an empty entitity... fuck this shit. (i > 1 && (in >= 'a' && in <= 'z') || (in >= 'A' && in <= 'Z') || (in >= '0' && in <='9') || (in == '-') || (in == '.') ) || diff --git a/test.c b/test.c new file mode 100644 index 0000000000000000000000000000000000000000..ffdfe65514499a328100d395fd46a517c4a4d481 --- /dev/null +++ b/test.c @@ -0,0 +1,9 @@ +#include <stdio.h> +#include "entities_gperf.h" + +int main(int argc,char *argv[]) { + if(argc < 2) return 1; + struct entity *e = in_word_set(argv[1],strlen(argv[1])); + if(e) printf("%s -> %s\n",e->name,e->value); + else printf("not found\n"); +}
-----END OF PAGE-----
-- Response ended
-- Page fetched on Sun Jun 2 13:50:29 2024