#include #include #include #include "linuxsys.h" #include "linux.h" /* * this is a joke implementation of the epoll interface. * we get the events from buffd and deliver it to the * poll-sets. poll and select is based on this facility. */ typedef struct EPoll EPoll; typedef struct EPollEntry EPollEntry; struct EPollEntry { int fd; epoll_event e; epoll_event re; }; struct EPoll { EPoll *next; int *waiting; Rendez wakeup; int fd; // eventpoll filedescriptor vlong timeout; // time in nsec int maxentry; int nentry; EPollEntry entry[]; }; enum { Event, Timeout, Destroyed, }; extern void buffd(int fd); extern void buffdpoll(int fd); static int timerpid; static int timershutdown; static int timerinterrupted; EPoll *eplist; QLock eplistlock; static void destroyepolltag(void *tag) { EPoll *ep, **p; DPRINT("destroyepolltag()..."); ep = *fdtagp(tag); *fdtagp(tag) = nil; qlock(&eplistlock); for(p = &eplist; *p; p=&((*p)->next)){ if(*p != ep) continue; *p = ep->next; ep->next = nil; if(ep->waiting){ *ep->waiting = Destroyed; ep->waiting = nil; rwakeup(&ep->wakeup); } break; } qunlock(&eplistlock); free(ep); } static void forkepolltag(void *tag) { EPoll *ep, **p; ep = *fdtagp(tag); *fdtagp(tag) = nil; qlock(&eplistlock); for(p = &eplist; *p; p=&((*p)->next)){ if(*p != ep) continue; *p = ep->next; ep->next = nil; break; } qunlock(&eplistlock); free(ep); unlinkfdtag(tag); } int epoll_create(int size) { static int epfdn = 10000; EPoll *ep; int fd; void *tag; //fd = open("/dev/null", OREAD); fd = epfdn++; ep = malloc(sizeof(*ep) + ((size+1) * sizeof(EPollEntry))); memset(ep, 0, sizeof(*ep) + ((size+1) * sizeof(EPollEntry))); ep->maxentry = size; ep->nentry = 0; ep->fd = fd; ep->waiting = nil; ep->wakeup.l = &eplistlock; tag = openfdtag(ep->fd, TAG_EPOLL, 1); *fdtagp(tag) = ep; atdestroyfdtag(tag, destroyepolltag); atforkfdtag(tag, forkepolltag); qlock(&eplistlock); ep->next = eplist; eplist = ep; qunlock(&eplistlock); closefdtag(tag); return fd; } /* assumes eplistlock is aquired */ static int findepollentry(EPoll *ep, int fd) { int i; for(i=0; inentry; i++){ if(ep->entry[i].fd == fd) return i; } return -1; } void epollevent(int fd, ulong setevents, ulong resetevents) { EPoll *ep; restart: qlock(&eplistlock); for(ep = eplist; ep; ep=ep->next){ int i; int wakeup; wakeup = 0; for(i=0; inentry; i++){ EPollEntry *e; e = &ep->entry[i]; if(e->fd != fd) continue; if((e->e.events & (setevents|resetevents))==0) continue; e->re.events |= setevents; e->re.events &= ~resetevents; wakeup |= e->re.events; if(e->e.events & EPOLLONESHOT) e->e.events = 0; } if(wakeup && ep->waiting){ *ep->waiting = Event; ep->waiting = nil; rwakeup(&ep->wakeup); qunlock(&eplistlock); goto restart; } } qunlock(&eplistlock); } int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) { void *tag; EPoll *ep; int ret, i; int pollit; ret = 0; pollit = 0; tag = openfdtag(epfd, TAG_EPOLL, 0); if(tag == nil) return -EBADF; ep = *((EPoll**)fdtagp(tag)); assert(ep!=nil); qlock(&eplistlock); i = findepollentry(ep, fd); switch(op){ default: ret = -EINVAL; goto out; case EPOLL_CTL_ADD: if(i >= 0){ ret = -EEXIST; goto out; } assert(ep->nentry < ep->maxentry); pollit = 1; ep->entry[ep->nentry].fd = fd; ep->entry[ep->nentry].e.events = (event->events | POLLHUP | POLLERR); ep->entry[ep->nentry].e.data = event->data; ep->entry[ep->nentry].re.events = 0; ep->entry[ep->nentry].re.data = event->data; ep->nentry++; break; case EPOLL_CTL_DEL: if(i < 0){ ret = -ENOENT; goto out; } if(i+1 < ep->nentry){ memcpy( &ep->entry[i], &ep->entry[i+1], (ep->nentry - (i + 1))*sizeof(EPollEntry)); } ep->nentry--; break; case EPOLL_CTL_MOD: if(i < 0){ ret = -ENOENT; goto out; } ep->entry[i].e.events = event->events; ep->entry[i].e.data = event->data; ep->entry[i].re.events &= event->events; ep->entry[i].re.data = event->data; break; } out: qunlock(&eplistlock); closefdtag(tag); /* get current events from buffd */ if(pollit){ buffd(fd); buffdpoll(fd); } return ret; } static int timernote(void *, char *note) { if(threadp->pid!=0) return 0; if(strstr(note, "interrupt")){ timerinterrupted = 1; return 1; } if(strstr(note, "alarm")){ return 1; } else { return 0; } } static void settimer(void); static void killtimer(void) { DPRINT("epoll -> killing timer proc pid %d...", timerpid); if(timerpid <= 0) return; timershutdown = 1; postnote(PNPROC, timerpid, "interrupted"); } static void timerproc(void *) { threadp->pid = 0; atnotify(timernote, 1); for(;;){ long w; vlong acttime; vlong waittime; EPoll *ep; acttime = nsec(); waittime = 60LL * 1000000000LL; qlock(&eplistlock); for(ep = eplist; ep; ep = ep->next){ if(ep->waiting==nil) continue; if(timerinterrupted) goto wake; if(ep->timeout==0) continue; if(ep->timeout <= acttime){ wake: *ep->waiting = Timeout; ep->waiting = nil; rwakeup(&ep->wakeup); } else { if((ep->timeout - acttime) < waittime) waittime = (ep->timeout - acttime); } } w = (long)(waittime / 1000000LL); qunlock(&eplistlock); timerinterrupted = 0; if(timershutdown) break; /* * we send alarm if we add new pollsets and * interrupt this sleep() */ sleep(w); } timerpid = -1; DPRINT("timerproc: exit!\n"); } static void settimer(void) { if(timerpid <= 0){ timerinterrupted = 0; timershutdown = 0; timerpid = createxproc(timerproc, nil, RFMEM|RFPROC, 8 * 1024); atexit(killtimer); } else { int fd; char name[80]; if(timerpid < 0) return; /* interrupt the sleep() by sending alarm to timerpid */ snprint(name, sizeof(name), "/proc/%d/note", timerpid); fd = open(name, OWRITE); if(fd >= 0){ fprint(fd, "alarm"); close(fd); } } } int epoll_wait(int epfd, epoll_event *events, int maxevents, int timeout) { EPoll *ep; void *tag; int i, n; DPRINT("epoll_wait(%d, 0x%p, %d, %d)\n", epfd, events, maxevents, timeout); again: tag = openfdtag(epfd, TAG_EPOLL, 0); if(tag == nil) return -EBADF; ep = (EPoll*)*fdtagp(tag); qlock(&eplistlock); closefdtag(tag); ep->timeout = 0; /* scan the pollset and collect whats ready */ n = 0; for(i=0; (inentry) && (n < maxevents); i++){ if(ep->entry[i].re.events){ events[n].events = ep->entry[i].re.events; events[n].data = ep->entry[i].re.data; n++; } } /* no luck this time? */ if(n == 0 && timeout != 0){ int *x; x = malloc(sizeof(*x)); *x = ~0; if(timeout > 0){ /* set the time in nanoseconds to the future */ ep->timeout = nsec() + ((vlong)timeout * 1000000LL); ep->waiting = x; } else { /* wait infinite, dont sets a timeout. timerproc will ignore us */ ep->timeout = 0; ep->waiting = x; } settimer(); rsleep(&ep->wakeup); qunlock(&eplistlock); switch(*x){ default: abort(); case Event: free(x); goto again; case Destroyed: case Timeout: free(x); return 0; } } else { qunlock(&eplistlock); } return n; }