#include #include #include #include "dict.h" /* Dictionaries from the Germanic Lexicon Project: * "An Anglo-Saxon Dictionary" (Bosworth + Toller) * "An Icelandic English Dictionary" (Cleasby + Vigfusson) * http://lexicon.ff.cuni.cz/ */ enum { Buflen=1000, Maxaux=5, }; /* Possible tags */ enum { B, /* bold (and headword)*/ Header, /* page header */ I, /* italics */ Intro, /* introduction */ Letter, /* letter header */ Page, /* page */ Table, /* table */ Td, /* table data */ Th, /* table header */ Tr, /* table row */ Ntag /* end of tags */ }; /* Assoc tables must be sorted on first field */ static Assoc tagtab[] = { {"B", B}, {"b", B}, {"HEADER", Header}, {"I", I}, {"i", I}, {"INTRODUCTION", Intro}, {"letterheader", Letter}, {"PAGE", Page}, {"TABLE", Table}, {"TD", Td}, {"TH", Th}, {"TR", Tr}, }; static Assoc spectab[] = { {"AElig", L'Æ'}, {"AElig-acute", L'Ǽ'}, {"Aacute", L'Á'}, {"Aring", L'Å'}, {"Beta", L'Β'}, {"ETH", L'Ð'}, {"Eacute", L'É'}, {"FINGER", L'☞'}, {"Iacute", L'Í'}, {"OElig", L'Œ'}, {"Oacute", L'Ó'}, {"Ouml", L'Ö'}, {"THORN",L'Þ'}, {"Uacute", L'Ú'}, {"Uuml", L'Ü'}, {"Yacute", L'Ý'}, {"a-long", L'ā'}, {"a-short", L'ā'}, /* fixme */ {"aacute", L'á'}, {"acirc", L'â'}, {"aelig", L'æ'}, {"aelig-acute", L'ǽ'}, {"aelig-circ", L'æ'}, /* +circumflex */ {"aelig-long", L'ǣ'}, {"alpha", L'α'}, {"alpha-tonos", L'ά'}, {"amp", L'&'}, {"aolig-acute", L'ꜵ'}, /* +acute */ {"aring", L'å'}, {"auml", L'ä'}, {"b-bar", L'ƀ'}, /* b with bar U+0180 */ {"b-rune", L'b'}, /* fixme: should be rune */ {"beta", L'β'}, {"bull", L'•'}, {"c-rune", L'c'}, /* fixme: should be rune */ {"chi", L'χ'}, {"d-bar", L'ð'}, {"dash-uncertain", L'­'}, {"delta", L'δ'}, {"e-hook", L'ẻ'}, {"e-long", L'ē'}, {"e-rune", L'e'}, /* fixme: should be rune */ {"e-short",L'ē'}, /* fixme ? */ {"eacute", L'é'}, {"ecirc", L'ê'}, {"egrave", L'è'}, {"epsilon", L'ε'}, {"epsilon-tonos", L'έ'}, {"eta", L'η'}, {"eta-tonos", L'ή'}, {"eth", L'ð'}, {"euml", L'ë'}, {"f-rune", L'f'}, /* fixme: should be rune */ {"frac12", L'½'}, {"gamma", L'γ'}, {"hand", L'☞'}, {"i-long", L'ī'}, {"i-short", L'ī'}, /* fixme ? */ {"iacute", L'í'}, {"icirc", L'î'}, {"igrave", L'ì'}, {"iota", L'ι'}, {"iota-oxia", L'ί'}, {"iota-tonos", L'ί'}, {"iuml", L'ï'}, {"kappa", L'κ'}, {"l-bar", L'ł'}, {"l-rune", L'l'}, /* fixme: should be rune */ {"lambda", L'λ'}, {"mdash", L'—'}, {"mu", L'μ'}, {"n-long", L'n'}, /* um? */ {"n-rune", L'n'}, /* fixme: should be rune */ {"ntilde", L'ñ'}, {"nu", L'ν'}, {"o-long", L'ō'}, {"o-short",L'ō'}, /* fixme ? */ {"oacute", L'ó'}, {"obar", L'ø'}, {"ocirc", L'ô'}, {"oelig", L'œ'}, {"oelig-acute", L'œ'}, /* +acute */ {"ograve", L'ò'}, {"omega", L'ω'}, {"omega-tonos", L'ώ'}, {"omicron", L'ο'}, {"omicron-tonos", L'ό'}, {"oslash", L'ø'}, {"ouml", L'ö'}, {"para", L'¶'}, {"phi", L'φ'}, {"pi", L'π'}, {"pound", L'£'}, {"psi", L'ψ'}, {"r-long", L'r'}, /* with macron? or ɼ */ {"r-udot", L'·'}, /* fixme */ {"rho", L'ρ'}, {"sect", L'§'}, {"sigma", L'σ'}, {"sigmaf", L'ς'}, {"szlig", L'ß'}, /* fixme? */ {"tau", L'τ'}, {"theta", L'θ'}, {"thorn", L'þ'}, {"tilde", L'~'}, /* fixme, should these be x + tilde */ {"u-long", L'ū'}, {"u-rune", L'u'}, /* fixme: should be rune */ {"u-short", L'ū'}, /* fixme */ {"uacute", L'ú'}, {"ucirc", L'û'}, {"ugrave", L'ù'}, {"upsilon", L'υ'}, {"upsilon-tonos", L'ύ'}, {"uuml", L'ü'}, {"w-rune", L'w'}, /* fixme: should be rune */ {"xi", L'ξ'}, {"y-long", L'ÿ'}, /* fixme: y with macron? */ {"y-rune", L'y'}, /* fixme: should be rune */ {"yacute", L'ý'}, {"yogh", L'ʒ'}, {"zeta", L'ζ'}, }; static Rune normtab[128] = { /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', SPCS, L'\'', L'(', L')', L'*', L'+', L',', L'-', L'.', L'/', /*30*/ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7', L'8', L'9', L':', L';', TAGS, L'=', TAGE, L'?', /*40*/ L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G', L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O', /*50*/ L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W', L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_', /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g', L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o', /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w', L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE, }; static int tagstarts; static char tag[Buflen]; static int naux; static char auxname[Maxaux][Buflen]; static char auxval[Maxaux][Buflen]; static char spec[Buflen]; static Entry curentry; #define cursize (curentry.end-curentry.start) static char *getspec(char *, char *); static char *gettag(char *, char *); /* * cmd is one of: * 'p': normal print * 'h': just print headwords * 'P': print raw */ void gerlexproprintentry(Entry e, int cmd) { char *p, *pe; int t, headword; long r; p = e.start; pe = e.end; changett(0, 0, 0); headword=1; curentry = e; if(cmd == 'r') while(p or \n */ long gerlexpronextoff(long fromoff) { long a, n; int c; a = Bseek(bdict, fromoff, 0); if(a < 0) return -1; n = 0; c = Bgetc(bdict); for(;;) { if(c < 0) break; if(c == '\n') { c = Bgetc(bdict); if(c == '<') { c = Bgetc(bdict); if(c == 'B' && (c=Bgetc(bdict)) == '>') n = 3; else if(c == 'l' && (c=Bgetc(bdict)) == 'e') n = 3; if(n) break; } } else c=Bgetc(bdict); } return (Boffset(bdict)-n); } static char *prkey = "KEY TO THE PRONUNCIATION\n" "\n" "I. CONSONANTS\n" "b, d, f, k, l, m, n, p, t, v, z: usual English values\n" "\n" "g as in go (gəʊ)\n" "h ... ho! (həʊ)\n" "r ... run (rʌn), terrier (ˈtɛriə(r))\n" "(r)... her (hɜː(r))\n" "s ... see (siː), success (səkˈsɜs)\n" "w ... wear (wɛə(r))\n" "hw ... when (hwɛn)\n" "j ... yes (jɛs)\n" "θ ... thin (θin), bath (bɑːθ)\n" "ð ... then (ðɛn), bathe (beɪð)\n" "ʃ ... shop (ʃɒp), dish (dɪʃ)\n" "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n" "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n" "dʒ ... judge (dʒʌdʒ)\n" "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n" "ŋg ... finger (ˈfiŋgə(r))\n" "\n" "Foreign\n" "ʎ as in It. seraglio (serˈraʎo)\n" "ɲ ... Fr. cognac (kɔɲak)\n" "x ... Ger. ach (ax), Sc. loch (lɒx)\n" "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n" "ɣ ... North Ger. sagen (ˈzaːɣən)\n" "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n" "ɥ ... Fr. cuisine (kɥizin)\n" "\n" "II. VOWELS AND DIPTHONGS\n" "\n" "Short\n" "ɪ as in pit (pɪt), -ness (-nɪs)\n" "ɛ ... pet (pɛt), Fr. sept (sɛt)\n" "æ ... pat (pæt)\n" "ʌ ... putt (pʌt)\n" "ɒ ... pot (pɒt)\n" "ʊ ... put (pʊt)\n" "ə ... another (əˈnʌðə(r))\n" "(ə)... beaten (ˈbiːt(ə)n)\n" "i ... Fr. si (si)\n" "e ... Fr. bébé (bebe)\n" "a ... Fr. mari (mari)\n" "ɑ ... Fr. bâtiment (bɑtimã)\n" "ɔ ... Fr. homme (ɔm)\n" "o ... Fr. eau (o)\n" "ø ... Fr. peu (pø)\n" "œ ... Fr. boeuf (bœf), coeur (kœr)\n" "u ... Fr. douce (dus)\n" "ʏ ... Ger. Müller (ˈmʏlər)\n" "y ... Fr. du (dy)\n" "\n" "Long\n" "iː as in bean (biːn)\n" "ɑː ... barn (bɑːn)\n" "ɔː ... born (bɔːn)\n" "uː ... boon (buːn)\n" "ɜː ... burn (bɜːn)\n" "eː ... Ger. Schnee (ʃneː)\n" "ɛː ... Ger. Fähre (ˈfɛːrə)\n" "aː ... Ger. Tag (taːk)\n" "oː ... Ger. Sohn (zoːn)\n" "øː ... Ger. Goethe (gøːtə)\n" "yː ... Ger. grün (gryːn)\n" "\n" "Nasal\n" "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n" "ã ... Fr. franc (frã)\n" "ɔ˜ ... Fr. bon (bɔ˜n)\n" "œ˜ ... Fr. un (œ˜)\n" "\n" "Dipthongs, etc.\n" "eɪ as in bay (beɪ)\n" "aɪ ... buy (baɪ)\n" "ɔɪ ... boy (bɔɪ)\n" "əʊ ... no (nəʊ)\n" "aʊ ... now (naʊ)\n" "ɪə ... peer (pɪə(r))\n" "ɛə ... pair (pɛə(r))\n" "ʊə ... tour (tʊə(r))\n" "ɔə ... boar (bɔə(r))\n" "\n" "III. STRESS\n" "\n" "Main stress: ˈ preceding stressed syllable\n" "Secondary stress: ˌ preceding stressed syllable\n" "\n" "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n"; /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */ void gerlexproprintkey(void) { Bprint(bout, "%s", prkey); } /* * f points just after a '&', fe points at end of entry. * Accumulate the special name, starting after the & * and continuing until the next '.', in spec[]. * Return pointer to char after '.'. */ static char * getspec(char *f, char *fe) { char *t; int c, i; t = spec; i = sizeof spec; while(--i > 0) { c = *f++; if(c == ';' || f == fe) break; *t++ = c; } *t = 0; return f; } /* * f points just after '<'; fe points at end of entry. * Expect next characters from bin to match: * [/][^ >]+( [^>=]+=[^ >]+)*> * tag auxname auxval * Accumulate the tag and its auxilliary information in * tag[], auxname[][] and auxval[][]. * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0. * Set naux to the number of aux pairs found. * Return pointer to after final '>'. */ static char * gettag(char *f, char *fe) { char *t; int c, i; t = tag; c = *f++; if(c == '/') tagstarts = 0; else { tagstarts = 1; *t++ = c; } i = Buflen; naux = 0; while(--i > 0) { c = *f++; if(c == '>' || f == fe) break; if(c == ' ') { *t = 0; t = auxname[naux]; i = Buflen; if(naux < Maxaux-1) naux++; } else if(naux && c == '=') { *t = 0; t = auxval[naux-1]; i = Buflen; } else *t++ = c; } *t = 0; return f; }