/* entities.c -- recognize HTML ISO entities (c) 1998-2001 (W3C) MIT, INRIA, Keio University See tidy.c for the copyright notice. CVS Info : $Author: terry_teague $ $Date: 2001/08/19 19:18:57 $ $Revision: 1.6 $ */ #include #include "platform.h" #include "html.h" #define HASHSIZE 731 struct nlist { struct nlist *next; char *name; unsigned code; }; static struct nlist *hashtab[HASHSIZE]; struct entity { char *name; uint code; } entities[] = { {"nbsp", 160}, {"iexcl", 161}, {"cent", 162}, {"pound", 163}, {"curren", 164}, {"yen", 165}, {"brvbar", 166}, {"sect", 167}, {"uml", 168}, {"copy", 169}, {"ordf", 170}, {"laquo", 171}, {"not", 172}, {"shy", 173}, {"reg", 174}, {"macr", 175}, {"deg", 176}, {"plusmn", 177}, {"sup2", 178}, {"sup3", 179}, {"acute", 180}, {"micro", 181}, {"para", 182}, {"middot", 183}, {"cedil", 184}, {"sup1", 185}, {"ordm", 186}, {"raquo", 187}, {"frac14", 188}, {"frac12", 189}, {"frac34", 190}, {"iquest", 191}, {"Agrave", 192}, {"Aacute", 193}, {"Acirc", 194}, {"Atilde", 195}, {"Auml", 196}, {"Aring", 197}, {"AElig", 198}, {"Ccedil", 199}, {"Egrave", 200}, {"Eacute", 201}, {"Ecirc", 202}, {"Euml", 203}, {"Igrave", 204}, {"Iacute", 205}, {"Icirc", 206}, {"Iuml", 207}, {"ETH", 208}, {"Ntilde", 209}, {"Ograve", 210}, {"Oacute", 211}, {"Ocirc", 212}, {"Otilde", 213}, {"Ouml", 214}, {"times", 215}, {"Oslash", 216}, {"Ugrave", 217}, {"Uacute", 218}, {"Ucirc", 219}, {"Uuml", 220}, {"Yacute", 221}, {"THORN", 222}, {"szlig", 223}, {"agrave", 224}, {"aacute", 225}, {"acirc", 226}, {"atilde", 227}, {"auml", 228}, {"aring", 229}, {"aelig", 230}, {"ccedil", 231}, {"egrave", 232}, {"eacute", 233}, {"ecirc", 234}, {"euml", 235}, {"igrave", 236}, {"iacute", 237}, {"icirc", 238}, {"iuml", 239}, {"eth", 240}, {"ntilde", 241}, {"ograve", 242}, {"oacute", 243}, {"ocirc", 244}, {"otilde", 245}, {"ouml", 246}, {"divide", 247}, {"oslash", 248}, {"ugrave", 249}, {"uacute", 250}, {"ucirc", 251}, {"uuml", 252}, {"yacute", 253}, {"thorn", 254}, {"yuml", 255}, {"fnof", 402}, {"Alpha", 913}, {"Beta", 914}, {"Gamma", 915}, {"Delta", 916}, {"Epsilon", 917}, {"Zeta", 918}, {"Eta", 919}, {"Theta", 920}, {"Iota", 921}, {"Kappa", 922}, {"Lambda", 923}, {"Mu", 924}, {"Nu", 925}, {"Xi", 926}, {"Omicron", 927}, {"Pi", 928}, {"Rho", 929}, {"Sigma", 931}, {"Tau", 932}, {"Upsilon", 933}, {"Phi", 934}, {"Chi", 935}, {"Psi", 936}, {"Omega", 937}, {"alpha", 945}, {"beta", 946}, {"gamma", 947}, {"delta", 948}, {"epsilon", 949}, {"zeta", 950}, {"eta", 951}, {"theta", 952}, {"iota", 953}, {"kappa", 954}, {"lambda", 955}, {"mu", 956}, {"nu", 957}, {"xi", 958}, {"omicron", 959}, {"pi", 960}, {"rho", 961}, {"sigmaf", 962}, {"sigma", 963}, {"tau", 964}, {"upsilon", 965}, {"phi", 966}, {"chi", 967}, {"psi", 968}, {"omega", 969}, {"thetasym", 977}, {"upsih", 978}, {"piv", 982}, {"bull", 8226}, {"hellip", 8230}, {"prime", 8242}, {"Prime", 8243}, {"oline", 8254}, {"frasl", 8260}, {"weierp", 8472}, {"image", 8465}, {"real", 8476}, {"trade", 8482}, {"alefsym", 8501}, {"larr", 8592}, {"uarr", 8593}, {"rarr", 8594}, {"darr", 8595}, {"harr", 8596}, {"crarr", 8629}, {"lArr", 8656}, {"uArr", 8657}, {"rArr", 8658}, {"dArr", 8659}, {"hArr", 8660}, {"forall", 8704}, {"part", 8706}, {"exist", 8707}, {"empty", 8709}, {"nabla", 8711}, {"isin", 8712}, {"notin", 8713}, {"ni", 8715}, {"prod", 8719}, {"sum", 8721}, {"minus", 8722}, {"lowast", 8727}, {"radic", 8730}, {"prop", 8733}, {"infin", 8734}, {"ang", 8736}, {"and", 8743}, {"or", 8744}, {"cap", 8745}, {"cup", 8746}, {"int", 8747}, {"there4", 8756}, {"sim", 8764}, {"cong", 8773}, {"asymp", 8776}, {"ne", 8800}, {"equiv", 8801}, {"le", 8804}, {"ge", 8805}, {"sub", 8834}, {"sup", 8835}, {"nsub", 8836}, {"sube", 8838}, {"supe", 8839}, {"oplus", 8853}, {"otimes", 8855}, {"perp", 8869}, {"sdot", 8901}, {"lceil", 8968}, {"rceil", 8969}, {"lfloor", 8970}, {"rfloor", 8971}, {"lang", 9001}, {"rang", 9002}, {"loz", 9674}, {"spades", 9824}, {"clubs", 9827}, {"hearts", 9829}, {"diams", 9830}, {"quot", 34}, {"amp", 38}, {"apos", 39}, {"lt", 60}, {"gt", 62}, {"OElig", 338}, {"oelig", 339}, {"Scaron", 352}, {"scaron", 353}, {"Yuml", 376}, {"circ", 710}, {"tilde", 732}, {"ensp", 8194}, {"emsp", 8195}, {"thinsp", 8201}, {"zwnj", 8204}, {"zwj", 8205}, {"lrm", 8206}, {"rlm", 8207}, {"ndash", 8211}, {"mdash", 8212}, {"lsquo", 8216}, {"rsquo", 8217}, {"sbquo", 8218}, {"ldquo", 8220}, {"rdquo", 8221}, {"bdquo", 8222}, {"dagger", 8224}, {"Dagger", 8225}, {"permil", 8240}, {"lsaquo", 8249}, {"rsaquo", 8250}, {"euro", 8364}, {null, 0} }; static unsigned hash(char *s) { uint hashval; for (hashval = 0; *s != '\0'; s++) hashval = *s + 31*hashval; return hashval % HASHSIZE; } static struct nlist *lookup(char *s) { struct nlist *np; for (np = hashtab[hash(s)]; np != null; np = np->next) if (wstrcmp(s, np->name) == 0) return np; return null; } static struct nlist *install(char *name, uint code) { struct nlist *np; uint hashval; if ((np = lookup(name)) == null) { np = (struct nlist *)MemAlloc(sizeof(*np)); if (np == null || (np->name = wstrdup(name)) == null) return null; hashval = hash(name); np->next = hashtab[hashval]; hashtab[hashval] = np; } np->code = code; return np; } /* entity starting with "&" returns zero on error */ uint EntityCode(char *name) { int c; struct nlist *np; /* numeric entitity: name = "&#" followed by number */ if (name[1] == '#') { c = 0; /* zero on missing/bad number */ /* 'x' prefix denotes hexadecimal number format */ if (name[2] == 'x' || (!XmlTags && name[2] == 'X')) /* #427833 - fix by Bjšrn Hšhrmann 05 Jun 01 */ sscanf(name+3, "%x", &c); else sscanf(name+2, "%d", &c); return c; } /* Named entity: name ="&" followed by a name */ if ((np = lookup(name+1))) return np->code; return 0; /* zero signifies unknown entity name */ } void InitEntities(void) { struct entity *ep; for(ep = entities; ep->name != null; ++ep) install(ep->name, ep->code); } void FreeEntities(void) { struct nlist *prev, *next; int i; for (i = 0; i < HASHSIZE; ++i) { prev = null; next = hashtab[i]; while(next) { prev = next->next; MemFree(next->name); MemFree(next); next = prev; } hashtab[i] = null; } } char *EntityName(uint n) { struct entity *ep; for(ep = entities; ep->name != null; ++ep) { if (ep->code == n) return ep->name; } return null; }