#!/bin/rc if(~ $#uconv 0) uconv=8.uconv unicodedata = UnicodeData.txt for(i) unicodedata = $i fn Sprint { $uconv } fn Unicode { grep $rune < $unicodedata | tr -d '\015' } hex=' function hex0(s, base, r, n, i, k, c) { base = 16; if(s ~ /^0[xX]/) s = substr(str, 3); if(s !~ /^[0-9a-fA-f]*/) return 0; n = length(s) r = 0 for (i = 1; i <= n; i++) { c = tolower(substr(s, i, 1)) k = index("0123456789abcdef", c) - 1; r = r * base + k } return r } ' awk '-F;' ' ' ^ $hex ^ ' function hex(x){ return strtonum("0x" x); } BEGIN { } $3 ~ /^L.$/ { cp = hex($1); if(cp > last) last = cp if(length($14)>=4) lc[cp] = $14 if(length($6) >= 4 && $6 !~ /<.*/) val[cp] = $6 } END { print "static"; print "Rune\t__base2[] ="; print "{"; for(i = 0; i < last; i++){ cp = i while(length(r = val[cp]) > 0){ cp = hex(r) if(cp == 0) print "broke at [" $0 "] r= " r } # if(length(r = lc[cp]) > 0) # cp = hex(r) if(cp != i) printf("\t0x%04x,\t0x%04x,\t/* \\u%04x \\u%04x */\n", i, cp, i, cp); } print "};"; print ""; } ' <{Unicode} | Sprint