#include #include #include /* automatically generated; do not edit. */ typedef struct Fibhdr Fibhdr; struct Fibhdr { ushort wIdent; ushort nFib; ushort nProduct; ushort lid; short pnNext; uchar fDot; uchar fGlsy; uchar fComplex; uchar fHasPic; uchar cQuickSaves; uchar fEncrypted; uchar fWhichTblStm; uchar fReadOnlyRecommended; uchar fWriteReservation; uchar fExtChar; uchar fLoadOverride; uchar fFarEast; uchar fCrypto; ushort nFibBack; ulong lKey; uchar envr; uchar fMac; uchar fEmptySpecial; uchar fLoadOverridePage; uchar fFutureSavedUndo; uchar fWord97Saved; ushort chs; ushort chsTables; long fcMin; long fcMac; ushort csw; }; enum { bcFibhdr = 0x22 }; /* automatically generated; do not edit. */ void readFibhdr(Fibhdr *s, uchar *v, int nv) { if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr"); s->wIdent = v[0x0] | (v[0x0+1] << 8); s->nFib = v[0x2] | (v[0x2+1] << 8); s->nProduct = v[0x4] | (v[0x4+1] << 8); s->lid = v[0x6] | (v[0x6+1] << 8); s->pnNext = v[0x8] | (v[0x8+1] << 8); s->fDot = ((v[0xA]) & 0x1) >> 0; s->fGlsy = ((v[0xA]) & 0x2) >> 1; s->fComplex = ((v[0xA]) & 0x4) >> 2; s->fHasPic = ((v[0xA]) & 0x8) >> 3; s->cQuickSaves = ((v[0xA]) & 0x240) >> 4; s->fEncrypted = ((v[0xB]) & 0x1) >> 0; s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1; s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2; s->fWriteReservation = ((v[0xB]) & 0x8) >> 3; s->fExtChar = ((v[0xB]) & 0x16) >> 4; s->fLoadOverride = ((v[0xB]) & 0x32) >> 5; s->fFarEast = ((v[0xB]) & 0x64) >> 6; s->fCrypto = ((v[0xB]) & 0x128) >> 7; s->nFibBack = v[0xC] | (v[0xC+1] << 8); s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24); s->envr = v[0x12]; s->fMac = ((v[0x13]) & 0x1) >> 0; s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1; s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2; s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3; s->fWord97Saved = ((v[0x13]) & 0x16) >> 4; s->chs = v[0x14] | (v[0x14+1] << 8); s->chsTables = v[0x16] | (v[0x16+1] << 8); s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24); s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24); s->csw = v[0x20] | (v[0x20+1] << 8); } void usage(void) { fprint(2, "usage: mswordstrings /mnt/doc/WordDocument\n"); exits("usage"); } void main(int argc, char **argv) { Biobuf *b; Biobuf bout; uchar buf[512]; Fibhdr f; int i, c, n; ARGBEGIN{ default: usage(); }ARGEND if(argc != 1) usage(); Binit(&bout, 1, OWRITE); b = Bopen(argv[0], OREAD); if(b == nil) { fprint(2, "couldn't open file: %r\n"); exits("word"); } n = Bread(b, buf, sizeof buf); if(n < sizeof buf) { fprint(2, "short read: %r\n"); exits("read"); } readFibhdr(&f, buf, sizeof buf); // printFibhdr(&f); Bseek(b, f.fcMin, 0); n = f.fcMac - f.fcMin; for(i=0; i"); break; /* field end */ case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */ case 31: break; /* non-required hyphen */ /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */ case 160: Bprint(&bout, " "); break; /* non-breaking space */ /* * these are only supposed to get used when special is set, but we * never see these ascii values otherwise anyway. */ /* * Empirically, some documents have sections of text where * every character is followed by a zero byte. Some have sections * of text where there are no zero bytes. Still others have both * types and alternate between them. Until we parse which * characters are ``special'', page numbers lose out. */ case 0: /* Bprint(&bout, ""); */ break; case 1: Bprint(&bout, ""); break; case 2: Bprint(&bout, ""); break; case 3: Bprint(&bout, ""); break; case 4: Bprint(&bout, ""); break; case 5: Bprint(&bout, ""); break; case 6: Bprint(&bout, ""); break; /* case 7: Bprint(&bout, ""); break; */ case 8: Bprint(&bout, ""); break; case 10: Bprint(&bout, ""); break; /* case 11: Bprint(&bout, ""); break; */ /* case 12: Bprint(&bout, "
"); break; */ /* case 14: Bprint(&bout, ""); break; */ case 15: Bprint(&bout, ""); break; case 16: Bprint(&bout, ""); break; case 22: Bprint(&bout, ""); break; case 23: Bprint(&bout, ""); break; case 24: Bprint(&bout, ""); break; case 25: Bprint(&bout, ""); break; case 26: Bprint(&bout, ""); break; case 27: Bprint(&bout, ""); break; case 28: Bprint(&bout, ""); break; case 29: Bprint(&bout, ""); break; /* printable ascii begins hereish */ /* case 30: Bprint(&bout, ""); break; case 33: Bprint(&bout, ""); break; case 34: Bprint(&bout, ""); break; case 35: Bprint(&bout, ""); break; case 36: Bprint(&bout, ""); break; case 37: Bprint(&bout, ""); break; case 38: Bprint(&bout, ""); break; case 39: Bprint(&bout, ""); break; case 41: break; */ } } Bprint(&bout, "\n"); }