#!/bin/rc if(~ $#uconv 0) uconv=8.uconv unicodedata = UnicodeData.txt for(i) unicodedata = $i fn Sprint { $uconv } fn Unicode { grep $rune < $unicodedata | tr -d '\015' } hex=' function hex0(s, base, r, n, i, k, c) { base = 16; if(s ~ /^0[xX]/) s = substr(str, 3); if(s !~ /^[0-9a-fA-f]*/) return 0; n = length(s) r = 0 for (i = 1; i <= n; i++) { c = tolower(substr(s, i, 1)) k = index("0123456789abcdef", c) - 1; r = r * base + k } return r } ' awk '-F;' ' ' ^ $hex ^ ' function hex(x){ return strtonum("0x" x); } BEGIN { } $3 ~ /^L.$/ { cp = hex($1); if(cp > last) last = cp # if(length($14)>=4) # lc[cp] = $14 if(length($6) >= 4 && $6 !~ /<.*/) val[cp] = $6 } END { for(i = 0; i < last; i++){ cp = i while(length(r = val[cp]) > 0){ cp = hex(r) if(cp == 0) print "broke at [" $0 "] r= " r } # if(length(r = lc[cp]) > 0) # cp = hex(r) if(cp != i){ str = sprintf("\\u%04x", i) tab[cp] = tab[cp] str cmt[cp] = cmt[cp] sprintf("\\u%04x", i); } } print "static"; print "Rune\t*__unfoldbase[] ="; print "{"; for(i = 0; i < last; i++) if(length(tab[i]) > 0){ str = sprintf("\\u%04x", i) print "/* " str " */\tL\"" str tab[i] "\"," # print "\t\"" str tab[i] "\", /* " str " */" } print "};"; print ""; } ' <{Unicode} | Sprint