/* * Implements the RPCs for a Plan B volume * relying on fs.c, ctlfs.c, or nilfs.c. * * Locking order: * 1st. fids are locked by the RPCs using them. * 2nd. volslck. * 3rd. mvol locks or vol mntlck * In general: * do not lock fids while holding a vol/mvol lock. * do not lock volslck while holding a mvol/vol lock. */ #include #include #include #include #include #include #include "names.h" #include "vols.h" /* We use our own fids for servers, because we may * have to (re)attach to them and we don't know which fids * the client will use in the future. * All fids owned by us have 0 as their client nr. * The alloc routine returns a new fid each time we alloc 0. * * Regarding Rpcs used for our own bookkeeping, we try to * use the client supplied rpc. See the comment near the top * of main.c */ static QLock fidlck; static int fidgen; static Fid* fidhash[Fhashsz]; static Fid* freefids; static ulong nfids; static ulong nafids; int msglen; #define FIDHASH(s) fidhash[(s)%Fhashsz] int hdebug; void dbgprint(Fid* fid, char* msg, ...) { va_list arg; if (!debug && (!fid || !fid->debug)) return; va_start(arg, msg); vfprint(2, msg, arg); va_end(arg); } void Dbgprint(Fid* fid, char* msg, ...) { va_list arg; if (debug < 2 && (!fid || fid->debug < 2)) return; va_start(arg, msg); vfprint(2, msg, arg); va_end(arg); } int fidfmt(Fmt* fmt) { Fid* f; f = va_arg(fmt->args, Fid*); if (!f) return fmtprint(fmt, "nil fid"); return fmtprint(fmt, "0x%p: fid=%d sfid=%d %N" " mv=%p fs=%p e=%d Q=%llx %c %c", f, f->nr, f->snr, f->sname, f->mvol, f->fs, f->epoch, f->qid.path, ((f->qid.type&QTDIR) ? 'd' : ' '), (f->notimeout ? 'n' : 't')); } static Fid * getfid(int nr) { Fid *f; qlock(&fidlck); for(f = FIDHASH(nr); f; f = f->hnext) if(f->nr == nr) break; qunlock(&fidlck); return f; } int getfidnr(void) { int nr; qlock(&fidlck); nr = ++fidgen; qunlock(&fidlck); return nr; } void closefid(Fid* f, Frpc* fop) { /* We keep f->isopen as it was. * This is only to close the fid in the server. */ if (f->fs && f->fs != &ctlfs && f->fs->fd >= 0){ fop->f.type = Tclunk; fop->f.fid = f->snr; fsop(f->fs, fop); } putfs(f->fs); f->fs = nil; f->epoch = 0; } static int openfid(Fid* fid, Frpc* fop) { if (fid->fs && fid->fs != &ctlfs && fid->fs->fd >= 0){ fop->f.type = Topen; fop->f.fid = fid->snr; fop->f.mode = fid->omode; return fsop(fid->fs, fop); } else return -1; } int fidfree(Fid* fid, Frpc* fop) { Fid *f, **l; int nr; if (fid == nil) return 1; qlock(&fidlck); nfids--; l = &FIDHASH(fid->nr); for(f = *l; f; f = f->hnext) { if(fid == f) { *l = f->hnext; qunlock(&fidlck); closefid(f, fop); f->isopen = 0; if (f->mvol) mvoldelfid(f->mvol, f); putmvol(f->mvol, fop); putfs(f->fs); n_reset(f->sname); free(f->d); free(f->ureadbuf); qlock(&fidlck); f->hnext = freefids; freefids = f; nr = f->nr; Dbgprint(f, "fidfree: released fid=%d\n", nr); f->debug = 0; for(f = *l; f != nil; f = f->hnext) if (nr && f->nr == nr)fprint(2, "\n****\n**** BUG: dup fid in hash: %d\n****\n\n", nr); qunlock(&fidlck); return 1; } l = &f->hnext; } qunlock(&fidlck); fprint(2, "\n\nXXX fidfree: not found? %X\n\n", fid); return 0; } Fid * fidalloc(int nr) { Fid *new, **l; Name* sn; int snr; qlock(&fidlck); l = &FIDHASH(nr); for(new = *l; new; new = new->hnext){ // if nr is 0 it's a local fid used by this program // to issue requests to servers. We may have several // ones, and it's ok if they are dup. if(nr && new->nr == nr){ qunlock(&fidlck); return 0; } } nfids++; if(freefids == 0) { nafids++; freefids = emalloc(sizeof(Fid)); freefids->sname = n_new(); freefids->hnext = nil; freefids->snr = ++fidgen; } new = freefids; freefids = new->hnext; sn = new->sname; snr= new->snr; memset(new, 0, sizeof(Fid)); new->sname= sn; new->snr = snr; new->nr = nr; new->hnext = *l; *l = new; qunlock(&fidlck); return new; } void dumpfids(void) { int i; Fid* f; int nh; fprint(2, "fids: %ld used %ld allocated\n", nfids, nafids); qlock(&fidlck); for (i = 0; i < nelem(fidhash); i++){ nh = 0; for (f = fidhash[i]; f; f = f->hnext){ nh++; fprint(2, " %X\n", f); fprint(2, " %W\n", f->mvol); } if (nh > 5) fprint(2, "\t[%d fids in bucket]\n", nh); } qunlock(&fidlck); } /* * Version, Auth, and Attach are handled by us. * To the user, it seems that a volume is being mounted. * All auth is done by the factotum underlying this program, not by clients. */ static void fsversion(Frpc* fop) { if(strncmp(fop->f.version, "9P", 2) != 0){ fop->f.version = "unknown"; } fop->r.tag = fop->f.tag; fop->r.type = Rversion; fop->r.version = "9P2000"; /* * BUG: We don't know what the real file server msize will * be, we tried using iounit in open, but the kernel seems * to ignore that. */ if (fop->f.msize > 8*1024) fop->f.msize = 8*1024; msglen = fop->r.msize = fop->f.msize; } static void fserror(Frpc* fop, char* e) { fop->r.tag = fop->f.tag; fop->r.type = Rerror; fop->r.ename= e; } static void fsauth(Frpc* fop) { fserror(fop, Eauth); } static int fidbound(Fid* fid, Vol* v) { return !fid->stale && fid->fs && v && v->fs == fid->fs && fid->epoch == fid->fs->epoch; } /* Fid may be a brand new one, or an already bound one. * If bindfid succeeds, fid will be a valid fid in v * for its sname in v->fs. * This does not reopen the fid if it was open. */ static int bindfid(Fid* fid, Mvol* mv, Vol* v, Frpc* fop) { int unbound; if (fidbound(fid, v)) return 1; closefid(fid, fop); if (mv == nil){ // fid into our ctl fs. We're done. incref(&ctlfs); putfs(fid->fs); fid->fs = &ctlfs; fid->epoch = ctlfs.epoch; fid->stale = 0; return 1; } if (fid->mvol == nil){ assert(!fid->linked); unbound = 1; fid->mvol = mv; if (mv) { incref(mv); fid->notimeout = mv->notimeout; fid->debug = mv->debug; } } else { unbound = 0; assert(fid->mvol == mv); } if(v && v->fs) incref(v->fs); putfs(fid->fs); fid->fs = (v ? v->fs : nil); if (fid->fs != nil && fid->mvol != nil && fid->mvol->musthave) fid->fs->musthave = 1; fid->stale = 0; if (v && v->slash && v->fs){ if (walkfid(v->slash, fid, nil, 0, fop) <= 0){ if (unbound){ decref(fid->mvol); fid->mvol = nil; } return 0; } } else { fid->qid.type = QTDIR; fid->qid.path = 0; fid->qid.vers = 0; } if (unbound && mv) mvoladdfid(mv, fid); return 1; } static void fsattach(Frpc* fop) { Vol* v; Mvol* mv; Fid* fid; int nr; int tag; nr = fop->f.fid; tag= fop->f.tag; if (strcmp(getuser(), fop->f.uname)){ fserror(fop, Euser); return; } mv = newmvol(fop->f.aname); if (mv == nil && fop->f.aname[0] != 0){ fserror(fop, Espec); return; } updatemvol(mv, fop); fid = fidalloc(nr); if (fid == nil){ putmvol(mv, fop); fop->r.tag = tag; fserror(fop, Ebadfid); return; } if (!mv){ fid->qid.path = Ctldirqid; fid->qid.vers = 0; fid->qid.type = QTDIR; } for(;;){ // If all vols fail, mv ends up empty. // the rpc is guaranteed to work there. v = getmvolvol(mv, 0); if (bindfid(fid, mv, v, fop)) break; vdprint(2, "attach: deadvol: %s %s\n", v->addr, v->name); qlock(&volslck); deadvol(v, fop); qlock(mv); mvolunmount(mv, v, fop); qunlock(mv); qunlock(&volslck); } if (fid->fs == nil) newfsqid(&fid->qid); // unique qid for fake fs / fop->r.type = Rattach; fop->r.qid = fid->qid; fop->r.tag = tag; } /* We must ensure the system always gets the same qid for * a file we serve: * - The kernel can be using only those files that * have fids on us. Usually Chans for a name space. * - User programs may remember qids for closed files, * that do not have fids on us (e.g., acme for its files). * * The qids we use are the first ones seen, decorated * in the high long in path with nfid->fs->qid. To make them unique. * The decoration ensures that fids forgotten get the same qid if * their file stays the same. * We leave Qid.vers as the real qid.vers+qid.path, to report changes * for files whose path/vers changes. */ static void fixseenqid(Fid* nfid, Fs* fs, Qid* wqid, int i) { Fid* old; ulong vers; vers = wqid[i].vers + (ulong)wqid[i].path; old = mvolgetfid(nfid->mvol, nfid->sname, nfid); if (old){ nfid->qid = old->qid; } else { nfid->qid = wqid[i]; if (fs) nfid->qid.path |= fs->qid.path; } nfid->qid.vers = vers; wqid[i] = nfid->qid; wqid[i].vers = vers; } /* Walks (the unbound) nfid through elems in nfid->sname. * The start for the walk is fid. * The sname in nfid is updated to reflect the walk. * Qids in nfid is updated to: | * Qids in fop->r.wqid[] are updated as well. * * When elems is nil, we walk nfid->sname, and leave the sname * as it was. This is used while rebinding a fid into a new volume. * * Return 1 if could walk elems; 0 if couldn't; -1 if got an IO error. */ int walkfid(Fid* fid, Fid* nfid, char** elems, int nelems, Frpc* fop) { int i; int rebind; int ndotdot; int a,b; int r; rebind = ndotdot = 0; if (elems == nil){ rebind = 1; elems = nfid->sname->elems; nelems= nfid->sname->nelems; } else { /* Drop the part of "../../..." that might * go past the root in the volume. We assume * that we won't get "a/../../..." */ for (i = 0; i < nelems; i++) if (strcmp(elems[i], "..")) break; else ndotdot++; while(ndotdot > fid->sname->nelems){ elems++; nelems--; ndotdot--; } } fop->f.type = Twalk; fop->f.fid = fid->snr; fop->f.newfid = nfid->snr; fop->f.nwname = nelems; for (i = 0; i < nelems; i++) fop->f.wname[i] = elems[i]; r = fsop(fid->fs, fop); if (r < 0 && !strncmp(fop->r.ename, "io ", 3)){ dbgprint(fid, "walkfid: io err: %X\n", fid); return r; } if (nfid != fid && fid->fs) nfid->epoch = fid->fs->epoch; if (!rebind && fop->r.type == Rwalk && nelems == 0 && fid != nfid){ n_reset(nfid->sname); n_cat(nfid->sname, fid->sname); } if (fop->r.type == Rwalk && fop->r.nwqid > 0){ if (!rebind){ /* This is in the inner loop. * If this is improved, the user will notice. */ if (fid != nfid){ n_reset(nfid->sname); n_cat(nfid->sname, fid->sname); } if (fop->r.nwqid < nelems) n_getpos(nfid->sname, &a, &b); for (i = 0; i < nelems; i++){ if (!strcmp(elems[i], "..")) n_dotdot(nfid->sname); else n_append(nfid->sname, elems[i]); fixseenqid(nfid, fid->fs, fop->r.wqid, i); } Dbgprint(fid, "\tfixed fid: %X\n", nfid); if (fop->r.nwqid < nelems) n_setpos(nfid->sname, a, b); } } if (nelems == 0 && nfid != fid && fop->r.nwqid == nelems){ // nfid->qid = fid->qid; fixseenqid(nfid, fid->fs, &fid->qid, 0); } return fop->r.nwqid == nelems; } static Ref once; static void fswalk(Frpc* fop) { Fid* fid; Fid* nfid; Vol* v; Frpc* fwop; // Auxiliary used to keep fop untouched. int r, nvol; int isunion; int musthave; int d, vd; fid = fop->fid = getfid(fop->f.fid); if (fid == nil || fid->isopen){ fserror(fop, Ebadfid); return; } incref(&once); if (once.ref == 1 && hdebug){ d = debug; vd= vdebug; } else d = vd = -1; dbgprint(fid, "<- %F\n", &fop->f); Dbgprint(fid, "Walk %X\n", fid); qlock(fid); if(fop->f.newfid != fop->f.fid){ nfid = fidalloc(fop->f.newfid); if(!nfid) { fprint(2, "BUG: dup fid\n"); /* Try to recover. * The user already knows this happens. */ nfid = getfid(fop->f.newfid); assert(nfid); nfid->nr = 0; // forget it. nfid = fidalloc(fop->f.newfid); if (!nfid){ fprint(2, "BUG at BUG\n"); fserror(fop, Edupfid); qunlock(fid); decref(&once); return; } } assert(!nfid->linked); assert(nfid->mvol == nil); if (nfid->mvol = fid->mvol){ incref(fid->mvol); mvoladdfid(nfid->mvol, nfid); nfid->notimeout = fid->mvol->notimeout; nfid->debug = fid->mvol->debug; } } else nfid = fid; fserror(fop, Ebadfid); // by default fwop = rpcalloc(); fwop->f.tag = fop->f.tag; nvol = 0; isunion = (fid->mvol ? fid->mvol->isunion : 0); musthave= (fid->mvol ? fid->mvol->musthave : 0); do { /* Try the walk in the volume used. * When all volumes in the mvol fail, * an empty fs is used intead of the real thing. * For musthave mvols, we don't accept the empty one. */ r = -1; updatemvol(fid->mvol, fwop); v = getmvolvol(fid->mvol, nvol++); if (!bindfid(fid, fid->mvol, v, fwop)) continue; if (musthave && (v == nil || fid->fs == nil)){ fprint(2, "wait for %s\n", fid->mvol->name); if (d != -1 && vd != -1) debug = vdebug = 2; sleep(Tmout*1000); checkvols(fwop); nvol = 0; continue; } r = walkfid(fid, nfid, fop->f.wname, fop->f.nwname, fop); if (r > 0){ if (nfid != fid && nfid->fs != fid->fs){ putfs(nfid->fs); if (nfid->fs = fid->fs) incref(nfid->fs); } } } while ((r <= 0 && v && isunion) || (r < 0 && musthave)); if (d != -1 && vd != -1){ debug = d; vdebug = vd; } decref(&once); rpcfree(fwop); if (r <= 0 && nfid != fid) // couln't walk. Get rid of it. fop->freefid = nfid; free(nfid->d); nfid->d = nil; Dbgprint(fid, "Walked fid %X\nWalked nfid %X\n", fid, nfid); qunlock(fid); } /* To read all entries of a union at once. * We know that fid points to the root of a union. */ static void fidreadall(Fid *fid, Frpc* fop) { char* buf; long bufsz; long buflen, off; long nr; Vol* v; int nvol; buf = emalloc(16*1024); bufsz = 16*1024; buflen = 0; updatemvol(fid->mvol, fop); nvol = 0; do { v = getmvolvol(fid->mvol, nvol++); if (v == nil && buflen > 0) break; closefid(fid, fop); if (!bindfid(fid, fid->mvol, v, fop)) // ignore continue; nr = openfid(fid, fop); off = 0; if (nr >= 0){ for(;;){ if (buflen == bufsz){ buf = erealloc(buf, bufsz + 16*1024); bufsz += 16*1024; } fop->f.type = Tread; fop->f.count= bufsz - buflen; if (fid->iounit && fop->f.count>fid->iounit) fop->f.count = fid->iounit; fop->f.offset = off; nr = fsop(fid->fs, fop); if (nr < 0 || fop->r.count <= 0) break; // next volume memmove(buf+buflen, fop->r.data, fop->r.count); buflen += fop->r.count; off += fop->r.count; } //closefid(fid, fop); } } while (v); fid->ureadbuf = buf; fid->ureadlen = buflen; } static int mayretry(Frpc* fop) { if (fop->f.type == Tstat || fop->f.type == Tread) return 1; if (fop->f.type == Topen && fop->f.mode == OREAD) return 1; return 0; } /* Similar to fsop(fid, fop), but retries on other * volumes available for the fid's mvol on io errors. * Note that this always retries in the first vol. */ static void mvolop(Fid* fid, Frpc* fop) { Frpc* fwop; Vol* v; int r; int ntries; fwop = nil; ntries = 0; again: if (fid->stale || (fid->fs && fid->fs->fd < 0)){ if (fwop == nil) fwop = rpcalloc(); fwop->f.tag = fop->f.tag; closefid(fid, fwop); } /* Recover fids for volumes that were gone. * On unions we don't recover. The particular vol was gone. */ if (fid->fs == nil && !fid->mvol->isunion && mayretry(fop)){ dbgprint(fid, "rebind for RPC %d: %X\n", fop->f.type, fid); if (fwop == nil) fwop = rpcalloc(); fwop->f.tag = fop->f.tag; for(;;){ fwop->fid = fid; updatemvol(fid->mvol, fwop); v = getmvolvol(fid->mvol, 0); if(v || !fid->mvol || !fid->mvol->musthave) break; sleep(Tmout*1000); checkvols(fwop); dprint(2, "waiting for %s\n", fid->mvol->name); } bindfid(fid, fid->mvol, v, fwop); if (fid->isopen) openfid(fid, fwop); } r = fsop(fid->fs, fop); /* Retry if necessary. */ if (r < 0){ fserror(fop, fop->err); if (!fid->mvol->isunion) if (!strncmp(fop->r.ename, "io ", 3) && mayretry(fop)){ if (fwop == nil) fwop = rpcalloc(); fwop->f.tag = fop->f.tag; checkvols(fwop); ntries++; if (ntries > 2) fprint(2, "%d tries for fid %X\n", ntries, fid); goto again; } } rpcfree(fwop); } // Does not fix qids for dir reads. // Not needed to preserve client's binds. static void fsread(Frpc* fop) { long off, len, n, count; Dir d; Fid* fid; fid = fop->fid = getfid(fop->f.fid); if (fid == nil){ fserror(fop, Ebadfid); return; } dbgprint(fid, "<- %F\n", &fop->f); if (!fid->isopen){ fserror(fop, Enotopen); return; } qlock(fid); fop->f.fid = fid->snr; if (fop->f.count > fid->iounit && fid->iounit) fop->f.count = fid->iounit; off = fop->f.offset; count = fop->f.count; if (fid->sname->nelems > 0 || !fid->mvol || !fid->mvol->isunion){ mvolop(fid, fop); qunlock(fid); return; } /* Volume union reads */ if (fid->ureadbuf == nil){ fidreadall(fid, fop); fop->f.type = Tread; } fop->r.tag = fop->f.tag; fop->r.type= Rread; fop->r.count = 0; fop->r.data = (char*)fop->buf; len = fid->ureadlen; while(count > 0 && off < len){ n = convM2D((uchar*)fid->ureadbuf + off, len - off, &d, nil); if (n <= 0 || n > count) break; memmove(fop->r.data + fop->r.count, fid->ureadbuf + off, n); off += n; count -= n; fop->r.count += n; } qunlock(fid); } static void fsfwd(Frpc* fop) { Fid* fid; int op; fid = fop->fid = getfid(fop->f.fid); if (fid == nil){ fserror(fop, Ebadfid); return; } dbgprint(fid, "<- %F\n", &fop->f); op = fop->f.type; qlock(fid); fop->f.fid = fid->snr; if (fop->f.type == Twrite && fid->iounit && fid->iounit < fop->f.count) fop->f.count = fid->iounit; mvolop(fid, fop); switch(fop->r.type){ case Ropen: fop->r.qid = fid->qid; // comes from walkfid open: fid->isopen = 1; fid->omode = fop->f.mode; if (fop->r.iounit > msglen) fop->r.iounit = msglen; fid->iounit = fop->r.iounit; break; case Rremove: mvoldelotherfids(fid->mvol, fid); break; case Rcreate: n_append(fid->sname, fop->f.name); mvoldelotherfids(fid->mvol, fid); fixseenqid(fid, fid->fs, &fop->r.qid, 0); goto open; case Rstat: if (fop->d && fid->fs) fixseenqid(fid, fid->fs, &(fop->d->qid), 0); break; } qunlock(fid); if (op == Tremove || op == Tclunk){ putfs(fid->fs); fid->fs = nil; fop->freefid = fid; } } static void fsflush(Frpc* fop) { Fid* fid; fop->r.tag = fop->f.tag; fop->r.type= Rflush; fid = fop->fid = getfid(fop->f.fid); if (fid == nil) return; dbgprint(fid, "<- %F\m", &fop->f); /* RACE: We don't lock the fid. * The flushed request is probably holding the fid * lock, and that would block the flush. */ fop->f.fid = fid->snr; if (!fid->stale && fid->fs && fid->fs->fd >= 0) fsop(fid->fs, fop); } void (*fscalls[Tmax])(Frpc*) = { [Tversion] fsversion, [Tauth] fsauth, [Tattach] fsattach, [Tflush] fsflush, [Twalk] fswalk, [Tread] fsread, [Topen] fsfwd, [Tcreate] fsfwd, [Tclunk] fsfwd, [Twrite] fsfwd, [Tremove] fsfwd, [Tstat] fsfwd, [Twstat] fsfwd, };