/* * xen ethernet driver derived from: * Etherlink III, Fast EtherLink and Fast EtherLink XL adapters. * and Linux Xen Front end driver * To do: * clean up and fix up by plan 9 experts */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "../port/error.h" #include "../port/netif.h" #include "../xen/xennet.h" static char *status_name[] = { [NETIF_INTERFACE_STATUS_CLOSED] = "closed", [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", [NETIF_INTERFACE_STATUS_CONNECTED] = "connected", [NETIF_INTERFACE_STATUS_CHANGED] = "changed", }; /* fix me later ... this xen ether breaks a lot of rules ... not my code ... RGM */ void queue_machphys_update(ulong mfn, ulong pfn); #include "etherif.h" #define LOG(a) #define DPRINTK dp #define WPRINTK dp #define IPRINTK dp #define XCVRDEBUG if(1)print #define DEBUG /****************************************************************************** * Virtual network driver for conversing with remote driver backends. * * Copyright (c) 2002-2004, K A Fraser */ #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE]; static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1]; static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; struct Desc { /* can't do this until xspanalloc is fixed ... */ // unsigned char page[BY2PG]; ulong stat; ulong size; ulong offset; unsigned char *page; /* will point into a page-aligned place in data */ /* just to mess around -- we're having free list corruption and * it occurred in at least one case when two of these desc's * butted up against each other * so make it four not 2 * (later) turned out to bea long-standing bug in * xspanalloc! But leave this here for now anyway. */ unsigned char data[4*BY2PG]; struct Desc *next; }; int desc_alloc_count = 0, desc_free_count = 0; void *descarray[2048]; void recordalloc(void *p) { int i, found; for(i = found = 0; (! found) && (i < 2048); i++) { if (descarray[i] == 0) { descarray[i] = p; found++; } } } void recordfree(void *p) { int i; for(i = 0; i < 2048; i++) { if (descarray[i] == p) { descarray[i] = 0; break; } } } void dumplist(void) { int i; for(i = 0; i < 2048; i++) { void *p = descarray[i]; ulong *l; if (! p) continue; l = (ulong *) ((ulong) p - 8); dp("%p: size 0x%ulx magic 0x%ulx\n", p, *l, l[1]); } panic("shit"); } void checkit(char *s) { int i; for(i = 0; i < 2048; i++) { void *p = descarray[i]; ulong *l; if (! p) continue; l = (ulong *) ((ulong) p - 4); if (*l != 0x484f4c45) { dp("%s: For %p hole is bad: 0x%ulx, mfn 0x%ulx\n", s, p, *l, xen_mm_mfn(l)); dumplist(); } } } struct Desc *descalloc(char *) { struct Desc *d; unsigned long p; checkit("PREALLOC"); d = xallocz(sizeof(*d), 0); /* I need to learn how to use waserror/poperror better */ if (! d) return 0; p = (unsigned long) d->data; p = (p + 2*BY2PG) & ~(BY2PG-1); d->page = (unsigned char *) p; /* not atomic, sorry */ desc_alloc_count++; /* dp("DESC: %s: alloc %p(0x%ulx), page %p(0x%ulx)\n", s, d, xen_mm_mfn(d), d->page, xen_mm_mfn(d->page)); */ recordalloc(d); checkit("ALLOC"); return d; } void descfree(struct Desc *d, char *) { /* dp("DESC: %s: free %p(0x%ulx), page %p(0x%ulx)\n", type, d, xen_mm_mfn(d), d->page, xen_mm_mfn(d->page)); */ checkit("FREE"); recordfree(d); xfree(d); desc_free_count++; } typedef struct Desc Desc; struct Ctlr { Lock; /* consider a Qlock here a la the rhine */ struct netif_st *next; Ether *dev; int attached; NETIF_RING_IDX rx_resp_cons, tx_resp_cons; unsigned int tx_full, txbusy; Block *txbp; netif_tx_interface_t *tx; netif_rx_interface_t *rx; Lock tx_lock; Lock rx_lock; unsigned int handle; unsigned int evtchn; unsigned int irq; int interrupts; /* What is the status of our connection to the remote backend? */ #define BEST_CLOSED 0 #define BEST_DISCONNECTED 1 #define BEST_CONNECTED 2 unsigned int backend_state; /* Is this interface open or closed (down or up)? */ #define UST_CLOSED 0 #define UST_OPEN 1 unsigned int user_state; Desc *txd[NETIF_TX_RING_SIZE]; Desc *rxd[NETIF_RX_RING_SIZE]; }; static char *be_state_name[] = { [BEST_CLOSED] = "closed", [BEST_DISCONNECTED] = "disconnected", [BEST_CONNECTED] = "connected", }; /* keep it simple. Just statically allocate it for maxether. */ typedef struct Ctlr Ctlr; Ctlr controllers[MaxEther]; /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ #define ADD_ID_TO_FREELIST(_list, _id) \ (_list)[(_id)] = (_list)[0]; \ (_list)[0] = (void *)(unsigned long)(_id); /* I hate this kind of code anyway ... #define GET_ID_FROM_FREELIST(_list) \ ({ unsigned long _id = (unsigned long)(_list)[0]; \ (_list)[0] = (_list)[_id]; \ (unsigned short)_id; }) */ unsigned short GET_ID_FROM_FREELIST(Desc *_list[], int max) { unsigned short _id = (unsigned short)(_list)[0]; if (_id > max) panic("Bogus ID in GET_ID_FROM_FREELIST: 0x%x, max 0x%x\n", _id, max); _list[0] = _list[_id]; /* LOG(dp("get id from freelist _list %p _id %d\n", _list, _id);) LOG(dp("New list[0] is %d\n", _list[0]);) */ return _id; } void dumpit(void *x, int size) { int i, j; unsigned char *cp = x; dp("New packet: %p %d bytes\n", x, size); for(i = 0; i < size; i += 16) { dp("0x%x: ", i); for(j = 0; j < 16 && (i+j) < size; j++) { dp("%02x ", cp[i+j]); } dp("\n"); } dp("end of packet\n"); } static struct Ether *find_dev_by_handle(unsigned int handle) { struct Ctlr *np = &controllers[handle]; print("XENFE:find_dev_by_handle %d np %p np->dev %p\n", handle, np, np->dev); return np->dev; } /** Network interface info. */ struct netif_ctrl { /** Number of interfaces. */ int interface_n; /** Number of connected interfaces. */ int connected_n; /** Error code. */ int err; int iface_up; }; static struct netif_ctrl netctrl; static void netctrl_init(void) { memset(&netctrl, 0, sizeof(netctrl)); netctrl.interface_n = -1; } /** Get or set a network interface error. */ static int netctrl_err(int err) { if(err < 0 && !netctrl.err){ netctrl.err = err; print("XENFE:netctrl_err err=%d\n", err); } return netctrl.err; } /** Test if all network interfaces are connected. * * @return 1 if all connected, 0 if not, negative error code otherwise */ static int netctrl_connected(void) { int ok; ok = (netctrl.err ? netctrl.err : (netctrl.connected_n == netctrl.interface_n)); return ok; } /** Count the connected network interfaces. * * @return connected count */ static int netctrl_connected_count(void) { struct Ctlr *np = controllers; unsigned int connected; int i; connected = 0; for(i = 0; i < MaxEther; i++, np++); { if ( np->backend_state == BEST_CONNECTED ) connected++; } LOG(dp("XENFE: connected count is now %d\n", connected);) netctrl.connected_n = connected; return connected; } static void network_tx_buf_gc(Ctlr *np) { #undef LOG #define LOG(a) NETIF_RING_IDX i, prod; unsigned short id; Desc *d; LOG(dp("network_tx_buf_gc backend state is %d, BEST_CONNECTED is %d\n", np->backend_state, BEST_CONNECTED);) if ( np->backend_state != BEST_CONNECTED ) return; LOG(dp("network_tx_buf_gc resp_prod %d resp_cons %d req_prd %d\n", np->tx->resp_prod, np->tx_resp_cons, np->tx->req_prod);) checkit("TX_BUF_GC START"); do { prod = np->tx->resp_prod; for ( i = np->tx_resp_cons; i != prod; i++ ) { #undef LOG #define LOG(a) id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id; LOG(dp("id. Index in ring %d, gc to free is %d\n", MASK_NETIF_TX_IDX(i), id);) d = np->txd[id]; LOG(dp(" d for that is %p\n", d);) ADD_ID_TO_FREELIST(np->txd, id); LOG(dp("so free %p\n", d);) descfree(d, "TX"); } #undef LOG #define LOG(a) np->tx_resp_cons = prod; /* * Set a new event, then check for race with update of tx_cons. Note * that it is essential to schedule a callback, no matter how few * buffers are pending. Even if there is space in the transmit ring, * higher layers may be blocked because too much data is outstanding: * in such cases notification from Xen is likely to be the only kick * that we'll get. */ np->tx->event = prod + ((np->tx->req_prod - prod) >> 1) + 1; mb(); } while ( prod != np->tx->resp_prod ); if ( np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) ) { np->tx_full = 0; // if ( np->user_state == UST_OPEN ) // netif_wake_queue(dev); } checkit("TX_BUF_GC_END"); #undef LOG #define LOG(a) } static void network_alloc_rx_buffers(Ctlr *np) { unsigned short id; NETIF_RING_IDX i = np->rx->req_prod; int nr_pfns = 0; Desc *d; int xen_mm_decrease_reservation(unsigned long *pfn_array, int npfn); #undef LOG #define LOG(a) LOG(dp("newtwork_alloc_rx_buffers\n");) /* Make sure the batch is large enough to be worthwhile (1/2 ring). */ if (((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) || (np->backend_state != BEST_CONNECTED) ) return; LOG(dp("network_alloc_rx_buffers and i is %d np->rx_resp_cons is %d\n", i, np->rx_resp_cons);) do { LOG(dp(" top of loop ...\n");) d = descalloc("RX"); LOG(dp("descalloc d returns %p\n", d);) if ( d== nil) break; /**/ id = GET_ID_FROM_FREELIST(np->rxd, NETIF_RX_RING_SIZE); LOG(dp("np->rxd is %p, i is %d, d is %p\n", np->rxd, id, d);) np->rxd[id] = d; LOG(dp("np->rx->ring is %p\n", np->rx->ring);) np->rx->ring[MASK_NETIF_RX_IDX(i)].req.id = id; LOG(dp("rx_pfn_array is %p, nr_pfns is %d, PADDR(d)) is 0x%ulx\n", rx_pfn_array, nr_pfns, PADDR(d->page));) /**/ /* the below is cute and all; it just doesn't work. * the error control is just too coarse-graind; which one of the * many ops failed? You don't really know. So we do it slower * but with more knowledge. * we can fix it later once we're more sure that this is all * working right. */ if (set_va_mfn(d->page, 0, 0)) { dp("XENFE: Failed to set va %p to 0 and 0\n", d->page); } /**/ rx_pfn_array[nr_pfns] = xen_mm_mfn(d->page) >> PGSHIFT; /* * / rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping; rx_mcl[nr_pfns].args[0] = (unsigned long)d->page >> PGSHIFT; rx_mcl[nr_pfns].args[1] = 0; rx_mcl[nr_pfns].args[2] = 0; /* */ LOG(dp("bottom of loop ...\n");) nr_pfns++; } while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE ); LOG(dp(" nr_pfns is %d\n", nr_pfns);) if ((nr_pfns == 0) ) return; /* * We may have allocated buffers which have entries outstanding in the page * update queue -- make sure we flush those first! */ _flush_page_update_queue(); #ifdef NOT /* After all PTEs have been zapped we blow away stale TLB entries. */ /* done above in non-multi-call rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB; */ /* Give away a batch of pages. */ rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation; rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array; rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns; rx_mcl[nr_pfns].args[3] = 0; rx_mcl[nr_pfns].args[4] = DOMID_SELF; /* Zap PTEs and give away pages in one big multicall. */ (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1); /* Check return status of HYPERVISOR_dom_mem_op(). */ if ( rx_mcl[nr_pfns].args[5] != nr_pfns ) panic("Unable to reduce memory reservation, err 0x%x\n", rx_mcl[nr_pfns].args[5]); #endif if (xen_mm_decrease_reservation(rx_pfn_array, nr_pfns) < 0) { dp("XENFE: decrease reservation failed\n"); } np->rx->req_prod = i; LOG(dp("ALL DONE i is %d\n", i);) #undef LOG #define LOG(a) } static int network_start_xmit(Ctlr *np, void *data, int size) { #undef LOG #define LOG(a) unsigned short id; Desc *d; netif_tx_request_t *tx; NETIF_RING_IDX i; extern int faultpanic; if (size > BY2PG) { panic("xen network_start_xmit: size %d > 4096\n", size); } if ((np->tx_full) ) { print("%s: full queue wasn't stopped!\n","ether"); return -1; } /* xspanalloc is broken, you can not free memory allocated with * xspanalloc! */ //d = xspanalloc(sizeof(*d), BY2PG, 0); /* so we allocate 2x the size of d, then take a page from the middle * for data. blech. */ d = descalloc("TX"); LOG(dp("XENFE: tx: allocated %p\n", d);) if (! d) { print("Trouble in network_start_xmit: descalloc failed\n"); return -1; } d->size = size; memmove(d->page, data, size); ilock(&np->tx_lock); if (np->backend_state != BEST_CONNECTED ) { iunlock(&np->tx_lock); return 1; } i = np->tx->req_prod; id = GET_ID_FROM_FREELIST(np->txd, NETIF_TX_RING_SIZE); np->txd[id] = d; tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req; LOG(dp("np->tx is %p, np->tx->ring is %p\n", np->tx, np->tx->ring);) LOG(dp("tx is %p, PADDR(d)) is 0x%lx, size is %d\n", tx, PADDR(d), d->size);) LOG(dp("offsets: &tx->addr is %p, &tx->id is %p, &tx->size is %p\n", &tx->addr, &tx->id, &tx->size);) faultpanic = 0; LOG(dp("tx->id BEFORE assign is %d, tx->addr is 0x%lx, size is 0x%lx\n", tx->id, tx->addr, tx->size);) LOG(dp("assign id %d\n", id);) tx->id = id; LOG(dp("E");) checkit("XMITMIDDLE"); tx->addr = xen_mm_mfn(d->page); tx->size = d->size; wmb(); LOG(dp("network_stat_xmit: id %d, addr %p, size %d, set req_prod to %d\n", id, d, d->size, i+1);) np->tx->req_prod = i + 1; network_tx_buf_gc(np); if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) ) { np->tx_full = 1; // netif_stop_queue(dev); } iunlock(&np->tx_lock); /* np->stats.tx_bytes += d->size; np->stats.tx_packets++; */ /* Only notify Xen if there are no outstanding responses. */ mb(); // if ( np->tx->resp_prod == i ) { if (1) { LOG(dp("network_start_xmit: notify via evtchn %d\n", np->evtchn);) notify_via_evtchn(np->evtchn); /* LOG(dp(" TRY A GC\n");) network_tx_buf_gc(np); LOG(dp(" DONE THE GC\n");) */ } checkit("XMIT DONE"); LOG(dp("network_start_xmit: done\n");) return 0; #undef LOG #define LOG(a) } static int netif_poll(Ether *ether) { #undef LOG #define LOG(a) extern unsigned long *mfn; unsigned char *packet; Ctlr *np = ether->ctlr; Desc *d = 0, *newd = 0; netif_rx_response_t *rx; NETIF_RING_IDX i; /* mmu_update_t *mmu = rx_mmu;*/ multicall_entry_t *mcl = rx_mcl; LOG(dp("netif_poll\n");) np->interrupts++; ilock(&np->rx_lock); if ( np->backend_state != BEST_CONNECTED ) { LOG(dp("XENFE: poll on unconnected %p\n", ether);) iunlock(&np->rx_lock); return 0; } LOG(dp("i will be from rx_resp_cons(%d) to rx->resp_prod(%d))\n", np->rx_resp_cons, np->rx->resp_prod);) #undef LOG #define LOG(a) for ( i = np->rx_resp_cons; i != np->rx->resp_prod; i++ ) { rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp; LOG(dp("XENFE: poll: rx is %p, status %d, addr 0x%ulx\n", rx, rx->status, rx->addr);) /* * An error here is very odd. Usually indicates a backend bug, * low-memory condition, or that we didn't have reservation headroom. * Whatever - print an error and queue the id again straight away. */ if (rx->status <= 0) { LOG(dp("Status for el %d is <= 0 (0x%x)\n", i, rx->status);) /* Gate this error. We get a (valid) slew of them on suspend. */ if ( np->user_state == UST_OPEN ) { LOG(dp( "bad buffer on RX ring!(%d)\n", rx->status);) } np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id; wmb(); np->rx->req_prod++; continue; } LOG(dp("id is %d\n", rx->id);) LOG(dp("np %p np->rxd %p\n", np, np->rxd);) if (! d) { d = np->rxd[rx->id]; LOG(dp("d %p\n", d);) d->next = 0; newd = d; LOG(dp("newd %p\n", newd);) } else { newd->next = np->rxd[rx->id]; newd = newd->next; newd->next = 0; } LOG(dp("Collected Desc %p id %d\n", newd, rx->id);) ADD_ID_TO_FREELIST(np->rxd, rx->id); /* np->stats.rx_packets++; np->stats.rx_bytes += rx->status; */ LOG(dp("time to remap the page\n");) /* Remap the page. */ LOG(dp("remap rx->addr %p (PPN 0x%ulx) to be 0x%ulx (PADDR 0x%ulx))\n", rx->addr, PPN(rx->addr), newd->page, PADDR(newd->page));) #ifdef NOT mmu->ptr = PPN(rx->addr) | MMU_MACHPHYS_UPDATE; mmu->val = PADDR(newd->page) >> PGSHIFT; mmu++; mcl->op = __HYPERVISOR_update_va_mapping; mcl->args[0] = (unsigned long)newd->page >> PGSHIFT; mcl->args[1] = PPN(rx->addr) | KZERO; mcl->args[2] = 0; mcl++; #endif mfn[PADDR(newd->page) >> PGSHIFT] = rx->addr >> PGSHIFT; queue_machphys_update(PPN(rx->addr)>>PGSHIFT, PADDR(newd->page)>>PGSHIFT); _flush_page_update_queue(); set_va_mfn(newd->page, rx->addr>>PGSHIFT, PTEWRITE|PTEVALID); _flush_page_update_queue(); newd->offset = rx->addr & (BY2PG-1); newd->size = rx->status; } if ((mcl-rx_mcl ) > 0) { LOG(dp("mcl-rx_mcl is %d\n", mcl-rx_mcl);) } /* Do all the remapping work, and M->P updates, in one big hypercall. */ #ifdef NOT if ((mcl - rx_mcl) != 0) { mcl->op = __HYPERVISOR_mmu_update; mcl->args[0] = (unsigned long)rx_mmu; mcl->args[1] = mmu - rx_mmu; mcl->args[2] = 0; mcl++; (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); } LOG(dp("DONE multicall\n");) #endif while (d) { Block *bp; Desc *nextd; /* throw away the MAC header? no */ packet = &d->page[d->offset]; LOG(dp("packet is %p and offset is %d\n", packet, d->offset);) LOG(dp("Packet size is %d bytes\n", d->size);) bp = iallocb(d->size); if (! bp) { /* too bad, drop it */ nextd = d->next; descfree(d, "RXDROP"); d = nextd; continue; } LOG(dp("bp->rp is %p and packet is %p\n", bp->rp, packet);) // dp("and val of first byte of page is 0x%ux\n", // *(unsigned char *)d->page); // dumpit(packet, d->size); memmove(bp->rp, packet, d->size); bp->wp = bp->rp + d->size; nextd = d->next; descfree(d, "RXOK"); LOG(dp("etheriq bp %p size %d\n", bp, d->size);) d = nextd; etheriq(ether, bp, 1); } np->rx_resp_cons = i; // LOG(dp("Set rx_resp_cons to %d\n", i);) network_alloc_rx_buffers(np); // LOG(dp("Set np->rx->event to %d\n", i+1);) np->rx->event = i + 1; iunlock(&np->rx_lock); return 0; #undef LOG #define LOG(a) } static void interrupt(Ureg *, void *v) { Ether *ether; Ctlr *np; // LOG(dp("XENFE: xen fe interrupt v %p\n", v);) ether = (Ether *) v; np = ether->ctlr; netif_poll(ether); ilock(&np->tx_lock); // network_tx_buf_gc(np); iunlock(&np->tx_lock); // LOG(dp("XENFE: interrupt done\n");) } static void network_connect(struct Ether *dev, netif_fe_interface_status_changed_t *status) { struct Ctlr *np; int i, requeue_idx; // netif_tx_request_t *tx; np = dev->ctlr; ilock(&np->rx_lock); ilock(&np->tx_lock); /* Recovery procedure: */ /* Step 1: Reinitialise variables. */ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; np->rx->event = 1; /* Step 2: Rebuild the RX and TX ring contents. * NB. We could just free the queued TX packets now but we hope * that sending them out might do some good. We have to rebuild * the RX ring because some of our pages are currently flipped out * so we can't just free the RX skbs. * NB2. Freelist index entries are always going to be less than * __PAGE_OFFSET, whereas pointers to skbs will always be equal or * greater than __PAGE_OFFSET: we use this property to distinguish * them. */ /* Rebuild the TX buffer freelist and the TX ring itself. * NB. This reorders packets. We could keep more private state * to avoid this but maybe it doesn't matter so much given the * interface has been down. */ for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ ) { /* if ( (unsigned long)np->tx_skbs[i] >= KZERO ) { struct sk_buff *skb = np->tx_skbs[i]; tx = &np->tx->ring[requeue_idx++].req; tx->id = i; tx->addr = virt_to_machine(skb->data); tx->size = skb->len; np->stats.tx_bytes += skb->len; np->stats.tx_packets++; } */ } wmb(); np->tx->req_prod = requeue_idx; /* Rebuild the RX buffer freelist and the RX ring itself. */ for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ ) if ( (unsigned long)np->rxd[i] >= KZERO ) np->rx->ring[requeue_idx++].req.id = i; wmb(); np->rx->req_prod = requeue_idx; /* Step 3: All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. */ np->backend_state = BEST_CONNECTED; notify_via_evtchn(status->evtchn); /**/ network_tx_buf_gc(np); network_alloc_rx_buffers(np); /**/ /* IS there a plan 9 thing we should do here? if ( np->user_state == UST_OPEN ) netif_start_queue(dev); */ iunlock(&np->tx_lock); iunlock(&np->rx_lock); } static void vif_show(struct Ctlr *np) { #ifdef DEBUG if (np) { IPRINTK("<(%p):vif handle=%d %s(%s) evtchn=%d irq=%d tx=%p rx=%p>\n", np, np->handle, be_state_name[np->backend_state], np->user_state ? "open" : "closed", np->evtchn, np->irq, np->tx, np->rx); } else { IPRINTK("\n"); } #endif } /* Send a connect message to xend to tell it to bring up the interface. */ static void send_interface_connect(struct Ctlr *np) { ctrl_msg_t cmsg = { .type = CMSG_NETIF_FE, .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT, .length = sizeof(netif_fe_interface_connect_t), }; netif_fe_interface_connect_t *msg = (void*)cmsg.msg; DPRINTK(">\n"); vif_show(np); msg->handle = np->handle; msg->tx_shmem_frame = xen_mm_mfn(np->tx) >> PGSHIFT; msg->rx_shmem_frame = xen_mm_mfn(np->rx) >> PGSHIFT; /* Tell the controller to bring up the interface. */ ctrl_if_send_message_block(&cmsg, nil, 0, 0); print("CONNECT: message sent. Set something to 0 just to see\n"); /* np->tx->ring[0].req.id = 0;*/ print("CONNECT: set it\n"); DPRINTK("<\n"); } /* Send a driver status notification to the domain controller. */ static int send_driver_status(int ok) { int err; ctrl_msg_t cmsg = { .type = CMSG_NETIF_FE, .subtype = CMSG_NETIF_FE_DRIVER_STATUS, .length = sizeof(netif_fe_driver_status_t), }; netif_fe_driver_status_t *msg = (void*)cmsg.msg; msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN); err = ctrl_if_send_message_block(&cmsg, nil, 0, 0); return err; } /* Stop network device and free tx/rx queues and irq. */ static void vif_release(struct Ctlr *np) { /* Stop old i/f to prevent errors whilst we rebuild the state. */ ilock(&np->tx_lock); ilock(&np->rx_lock); // netif_stop_queue(np->dev); /* np->backend_state = BEST_DISCONNECTED; */ iunlock(&np->rx_lock); iunlock(&np->tx_lock); /* Free resources. */ if(np->tx != nil){ /* leave leak here ... * this will matter at some point, I want plan 9 experts to * get all the ins/outs of release done right. free_irq(np->irq, np->dev); unbind_evtchn_from_irq(np->evtchn); free_page((unsigned long)np->tx); free_page((unsigned long)np->rx); */ np->irq = 0; np->evtchn = 0; np->tx = nil; np->rx = nil; } } /* Release vif resources and close it down completely. */ static void vif_close(struct Ctlr *np) { DPRINTK(">\n"); vif_show(np); WPRINTK("Unexpected netif-CLOSED message in state %s\n", be_state_name[np->backend_state]); vif_release(np); np->backend_state = BEST_CLOSED; /* todo: take dev down and free. */ vif_show(np); DPRINTK("<\n"); } /* Move the vif into disconnected state. * Allocates tx/rx pages. * Sends connect message to xend. * N.B. On plan 9 we don't expect this to get called (yet) */ static void vif_disconnect(struct Ctlr *np){ DPRINTK(">\n"); /* LEAK if(np->tx) free_page((unsigned long)np->tx); if(np->rx) free_page((unsigned long)np->rx); */ print("There's still a leak in vif_disconnect\n"); // Before this np->tx and np->rx had better be null. np->tx = (netif_tx_interface_t *)xspanalloc(BY2PG, BY2PG, 0); np->rx = (netif_rx_interface_t *)xspanalloc(BY2PG, BY2PG, 0); memset(np->tx, 0, BY2PG); memset(np->rx, 0, BY2PG); LOG(dp("CONNECT: np->tx is %p, np->rx is %p\n", np->tx, np->rx);) LOG(dp("CONNECT: MFN of tx is 0x%lx, PADDR is 0x%lx\n", xen_mm_mfn(np->tx), PADDR(np->tx));) np->backend_state = BEST_DISCONNECTED; send_interface_connect(np); vif_show(np); DPRINTK("<\n"); } /* Begin interface recovery. * * NB. Whilst we're recovering, we turn the carrier state off. We * take measures to ensure that this device isn't used for * anything. We also stop the queue for this device. Various * different approaches (e.g. continuing to buffer packets) have * been tested but don't appear to improve the overall impact on * TCP connections. * * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery * is initiated by a special "RESET" message - disconnect could * just mean we're not allowed to use this interface any more. */ static void vif_reset( struct Ctlr *np) { DPRINTK(">\n"); IPRINTK("Attempting to reconnect network interface: handle=%d\n", np->handle); vif_release(np); vif_disconnect(np); vif_show(np); DPRINTK("<\n"); } /* Move the vif into connected state. * Sets the mac and event channel from the message. * Binds the irq to the event channel. */ static void vif_connect( struct Ctlr *np, netif_fe_interface_status_t *status) { static int create_netdev(int, struct Ether **); int xenfrontendreset(Ether *ether); struct Ether *dev = np->dev; DPRINTK(">\n"); memmove(dev->ea, status->mac, sizeof(dev->ea)); memmove(dev->addr, status->mac, sizeof(dev->addr)); create_netdev(status->handle, nil); network_connect(dev, status); np->evtchn = status->evtchn; np->irq = bind_evtchn_to_irq(np->evtchn, 0); intrenable(np->irq, interrupt, dev, 0, "xen network"); unmask_evtchn(np->evtchn); netctrl_connected_count(); netctrl_connected_count(); // vif_wake(dev); addethercard("xenfrontend", xenfrontendreset); vif_show(np); DPRINTK("<\n"); } /* Get the target interface for a status message. * Creates the interface when it makes sense. * The returned interface may be null when there is no error. * * @param status status message * @param np return parameter for interface state * @return 0 on success, error code otherwise */ static int target_vif( netif_fe_interface_status_t *status, struct Ctlr **np) { static int create_netdev(int, struct Ether **); int err = 0; struct Ether *dev; DPRINTK("> handle=%d\n", status->handle); if ( status->handle < 0 ) { err = -1; goto exit; } if ( (dev = find_dev_by_handle(status->handle)) != nil ) goto exit; if ( status->status == NETIF_INTERFACE_STATUS_CLOSED ) goto exit; if ( status->status == NETIF_INTERFACE_STATUS_CHANGED ) goto exit; /* It's a new interface in a good state - create it. */ DPRINTK("> create device...\n"); if ( (err = create_netdev(status->handle, &dev)) != 0 ) goto exit; netctrl.interface_n++; exit: if ( np != nil ) *np = ((dev && !err) ? dev->ctlr : nil); DPRINTK("< err=%d\n", err); return err; } /* Handle an interface status message. */ static void netif_interface_status(netif_fe_interface_status_t *status) { int err; struct Ctlr *np = nil; DPRINTK(">\n"); DPRINTK("> status=%s handle=%ud\n", status_name[status->status], status->handle); if ( (err = target_vif(status, &np)) != 0 ) { WPRINTK("Invalid netif: handle=%ud, err %d\n", status->handle, err); return; } if ( np == nil ) { DPRINTK("> no vif\n"); return; } DPRINTK(">\n"); vif_show(np); switch ( status->status ) { case NETIF_INTERFACE_STATUS_CLOSED: switch ( np->backend_state ) { case BEST_CLOSED: case BEST_DISCONNECTED: case BEST_CONNECTED: vif_close(np); break; } break; case NETIF_INTERFACE_STATUS_DISCONNECTED: switch ( np->backend_state ) { case BEST_CLOSED: vif_disconnect(np); break; case BEST_DISCONNECTED: case BEST_CONNECTED: vif_reset(np); break; } break; case NETIF_INTERFACE_STATUS_CONNECTED: switch ( np->backend_state ) { case BEST_CLOSED: WPRINTK("Unexpected netif status %s in state %s\n", status_name[status->status], be_state_name[np->backend_state]); vif_disconnect(np); vif_connect(np, status); break; case BEST_DISCONNECTED: vif_connect(np, status); break; } break; case NETIF_INTERFACE_STATUS_CHANGED: /* * The domain controller is notifying us that a device has been * added or removed. */ break; default: WPRINTK("Invalid netif status code %d\n", status->status); break; } vif_show(np); DPRINTK("<\n"); } /* * Initialize the network control interface. */ static void netif_driver_status(netif_fe_driver_status_t *status) { DPRINTK("> status=%d\n", status->status); netctrl.iface_up = status->status; //netctrl.interface_n = status->max_handle; //netctrl.connected_n = 0; netctrl_connected_count(); } /** Create a network device. * @param handle device handle * @param val return parameter for created device * @return 0 on success, error code otherwise */ static int create_netdev(int handle, struct Ctlr ** /*val*/) { int i; struct Ctlr *np = &controllers[handle]; dp("XENFE: create_netdev %d\n", handle); np->backend_state = BEST_CLOSED; np->user_state = UST_CLOSED; np->handle = handle; // np->tx_lock = np->rx_lock = 0; /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ ) np->txd[i] = (void *)(i+1); for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ ) np->rxd[i] = (void *)(i+1); LOG(dp("XENFE: all done set up the rings\n");) return 0; } /* * Initialize the network control interface. Set the number of network devices * and create them. */ static void netif_driver_status_change( netif_fe_driver_status_changed_t *status) { int err; int i; LOG(dp("XENFE: netif_driverr_status_chnage # ifaces %d\n", netctrl.interface_n );) LOG(dp("XENFE: status says %d interfaces\n", status->nr_interfaces);) netctrl.interface_n = status->nr_interfaces; netctrl.connected_n = 0; netctrl.iface_up = status->status; /* leave this here? Probably not*/ for ( i = 0; i < netctrl.interface_n; i++ ) { if ( (err = create_netdev(i, nil)) != 0 ) { netctrl_err(err); LOG(dp("create netdev failed...\n");) break; } } /**/ netctrl_connected_count(); LOG(dp("XENFE DONE driver status change\n");) } static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long /*id*/) { int respond = 1; LOG(dp("XENFE: xenfe netif_ctrlif_rx subtype %d\n", msg->type);) switch ( msg->subtype ) { case CMSG_NETIF_FE_INTERFACE_STATUS: dp("XENFE: got an iface status changed message, length %d want %d\n", msg->length, sizeof(netif_fe_interface_status_changed_t)); if ( msg->length != 18 ) /* SHITsizeof(netif_fe_interface_status_changed_t) )*/ goto error; netif_interface_status((netif_fe_interface_status_t *) &msg->msg[0]); dp("Done iface status\n"); break; case CMSG_NETIF_FE_DRIVER_STATUS: dp("XENFE: got a driver status changed message, len %d, want %d\n", msg->length, sizeof(netif_fe_driver_status_changed_t)); if ( msg->length != sizeof(netif_fe_driver_status_changed_t) ) goto error; dp("Call netif_driver_status_change ...\n"); netif_driver_status((netif_fe_driver_status_t *) &msg->msg[0]); dp("Done driver status\n"); /* Message is a response */ respond = 0; break; error: default: msg->length = 0; break; } if ( respond ) ctrl_if_send_response(msg); } static int netif_init(void) { ctrl_msg_t cmsg; netif_fe_driver_status_changed_t st; int err = 0; //Rendez r; /* if ( (start_info.flags & SIF_INITDOMAIN) || (start_info.flags & SIF_NET_BE_DOMAIN) ) return 0; */ print("XENFE:Initialising Xen virtual ethernet frontend driver"); netctrl_init(); ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); /* Send a driver-UP notification to the domain controller. */ cmsg.type = CMSG_NETIF_FE; cmsg.subtype = CMSG_NETIF_FE_DRIVER_STATUS; cmsg.length = sizeof(netif_fe_driver_status_changed_t); st.status = NETIF_DRIVER_STATUS_UP; st.nr_interfaces = 0; memmove(cmsg.msg, &st, sizeof(st)); ctrl_if_send_message_block(&cmsg, nil, 0, 0); return err; } static void initxen(Ctlr* ctlr) { LOG(dp("XENFE: xenfe init %p\n", ctlr);) USED(ctlr); } static uchar* startdma(Ether* ether, ulong address) { print("XENFE:xenfe startdma %p %ulx\n", ether, address); return 0; } static void promiscuous(void* arg, int on) { print("XENFE:xenfe promisc %p %d\n", arg, on); } static void multicast(void* arg, uchar *addr, int on) { print("XENFE:xenfe mc %p %p %d\n", arg, addr, on); } static void attach(Ether* ether) { Ctlr *ctlr; // int wait_i, wait_n = 20; // int err; dp("XENFE: etherxenfrontend: attach ether port 0x%lux ctrl %p\n", ether->port, ether->ctlr); /* Wait for all interfaces to be connected. */ while (netctrl_connected() < 0) { LOG(dp("XENFE: waiting for one connect\n");) HYPERVISOR_yield(); } ctlr = ether->ctlr; ilock(ctlr); if(ctlr->attached){ iunlock(ctlr); return; } ctlr->attached = 1; iunlock(ctlr); LOG(dp("XENFE: attach done for port %d\n", ether->port);) } static void statistics(Ether* ether) { LOG(dp("XENFE: xenfe statistics %p\n", ether);) USED(ether); } static void txstart(Ether* ether) { #undef LOG #define LOG(a) int len; Ctlr *ctlr; Block *bp; LOG(dp("XENFE: xenfe txstart %p\n", ether);) ctlr = ether->ctlr; /* * Attempt to top-up the transmit FIFO. If there's room simply * stuff in the packet length (unpadded to a dword boundary), the * packet data (padded) and remove the packet from the queue. * If there's no room post an interrupt for when there is. * This routine is called both from the top level and from interrupt * level and expects to be called with ctlr->wlock already locked * and the correct register window (Wop) in place. */ for(;;){ if(ctlr->txbp){ bp = ctlr->txbp; ctlr->txbp = 0; } else{ bp = qget(ether->oq); if(bp == nil) break; } len = ROUNDUP(BLEN(bp), 2); if(! ctlr->tx_full){ // int i; // for(i = 0; i < 16; i++) // LOG(dp("0x%x ", bp->rp[i]);) // LOG(dp("\n");) memmove(&bp->rp[6], ether->ea, sizeof(ether->ea)); network_start_xmit(ctlr, bp->rp, len); freeb(bp); ether->outpackets++; } else{ ctlr->txbp = bp; if(ctlr->txbusy == 0){ ctlr->txbusy = 1; } break; } } LOG(dp("txstart: done\n");) #undef LOG #define LOG(a) } static void transmit(Ether* ether) { #undef LOG #define LOG(a) Ctlr *ctlr; ctlr = ether->ctlr; ilock(ctlr); LOG(dp("XENFE: xenfe xmit port %d ctlr %p\n", ether->port, ctlr);) txstart(ether); iunlock(ctlr); LOG(dp("XENFE: transmit done\n");) #undef LOG #define LOG(a) } static long ifstat(Ether* ether, void* a, long n, ulong offset) { char *p; int len; Ctlr *ctlr; LOG(dp("XENFE: ifstat %p, %p, %ld, %ld\n", ether, a, n, offset);) if(n == 0) return 0; ctlr = ether->ctlr; ilock(ctlr); statistics(ether); iunlock(ctlr); p = malloc(READSTR); len = snprint(p, READSTR, "interrupts: %d\n", ctlr->interrupts); // len += snprint(p+len, READSTR-len, "bogusinterrupts: %lud\n", ctlr->bogusinterrupts); USED(len); #ifdef NOT if(ctlr->upenabled){ if(ctlr->upqmax > ctlr->upqmaxhw) ctlr->upqmaxhw = ctlr->upqmax; len += snprint(p+len, READSTR-len, "up: q %lud i %lud m %d h %d s %lud\n", ctlr->upqueued, ctlr->upinterrupts, ctlr->upqmax, ctlr->upqmaxhw, ctlr->upstalls); ctlr->upqmax = 0; } if(ctlr->dnenabled){ if(ctlr->dnqmax > ctlr->dnqmaxhw) ctlr->dnqmaxhw = ctlr->dnqmax; len += snprint(p+len, READSTR-len, "dn: q %lud i %lud m %d h %d\n", ctlr->dnqueued, ctlr->dninterrupts, ctlr->dnqmax, ctlr->dnqmaxhw); ctlr->dnqmax = 0; } snprint(p+len, READSTR-len, "badssd: %lud\n", ctlr->stats[BytesRcvdOk+2]); #endif n = readstr(offset, a, n, p); free(p); return n; } static void txrxreset(int port) { LOG(dp("XENFE: txrxreset %d\n", port);) USED(port); } static void shutdown(Ether *ether) { LOG(dp("XENFE: xenfrontend shutting down %p\n", ether);) // resetctlr(ether->ctlr); USED(ether); } /* this is an experiment ... */ Ether *theEther = nil; void xenpoll(void) { dp("X"); if (theEther) netif_poll(theEther); } int xenfrontendreset(Ether* ether) { // char *p; Ctlr *ctlr; // uchar ea[Eaddrlen]; static int scandone; LOG(dp("XENFE: xenfrontendreset!\n");) if (ether->ctlrno > 0) return -1; ctlr = &controllers[ether->ctlrno]; /* * Clear out the * adapter statistics, clear the statistics logged into ctlr * and enable statistics collection. */ ilock(ctlr); /* * Linkage to the generic ethernet driver. */ ether->attach = attach; ether->transmit = transmit; ether->interrupt = interrupt; ether->ifstat = ifstat; ether->promiscuous = promiscuous; ether->multicast = multicast; ether->shutdown = shutdown; ether->arg = ether; ether->ctlr = ctlr; ctlr->dev = ether; ether->maxmtu = 1514; iunlock(ctlr); theEther = ether; LOG(dp("XENFE: xenfrontendreset: OK\n");) return 0; } void etherxenfrontendlink(void) { LOG(dp("XENFE: etherxenfrontendlink!\n");) netif_init(); addethercard("xenfrontend", xenfrontendreset); }