#include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "ureg.h" #include "../port/error.h" #include "../port/sd.h" #include "../xen/xenblk.h" #define LOG(a) Lock io_request_lock; /****************************************************************************** * arch/xen/drivers/blkif/frontend/vbd.c * * Xenolinux virtual block-device driver. * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge */ /* Information about our VBDs. */ #define MAX_VBDS 64 static int nr_vbds; static vdisk_t *vbd_info; static void dumpit(void *x, int size) { int i, j; unsigned char *cp = x; print("New packet: %p %d bytes\n", x, size); for(i = 0; i < size; i += 16) { print("0x%x: ", i); for(j = 0; j < 16 && (i+j) < size; j++) { print("%02x ", cp[i+j]); } print("\n"); } print("end of packet\n"); } static int xlvbd_get_vbd_info(vdisk_t *disk_info) { int i; vdisk_t *buf = mallocalign(BY2PG, BY2PG, 0, 0); blkif_request_t req; blkif_response_t rsp; int nr; void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); memset(&req, 0, sizeof(req)); req.operation = BLKIF_OP_PROBE; req.nr_segments = 1; req.frame_and_sects[0] = xen_mm_mfn(buf) | 7; /* the 7 is icky! */ blkif_control_send(&req, &rsp); LOG( dp("===> blkif_control-=send returns %d\n", rsp.status);) dumpit(&rsp, sizeof(rsp)); if ( rsp.status <= 0 ) { dp( "Could not probe disks (%d)\n", rsp.status); free(buf); return -1; } if ( (nr = rsp.status) > MAX_VBDS ) nr = MAX_VBDS; // dumpit(buf, BY2PG); for(i = 0; i < nr; i++) { /* gosh we sure use C to abstract away machine details, eh? */ /* well, not in GCC-land any more! */ unsigned char *c = (unsigned char *) buf; c = &c[i*12]; print("Pointer is %p\n", c); disk_info[i].capacity = *(blkif_sector_t *) c; disk_info[i].device = *(blkif_vdev_t *) &c[8]; disk_info[i] .info = *(u16 *)&c[10]; print("Disk %d cap %lld dev %d info 0x%x\n", i, disk_info[i].capacity, disk_info[i].device, disk_info[i].info); } // memmove(disk_info, buf, nr * sizeof(vdisk_t)); free(buf); return nr; } /* * xlvbd_init_device - initialise a VBD device * @disk: a vdisk_t describing the VBD * * Takes a vdisk_t * that describes a VBD the domain has access to. * Performs appropriate initialisation and registration of the device. * * Care needs to be taken when making re-entrant calls to ensure that * corruption does not occur. Also, devices that are in use should not have * their details updated. This is the caller's responsibility. */ static int xlvbd_init_device(vdisk_t *xd) { // int device = xd->device; int rc = 0; unsigned long capacity; // unsigned char buf[64]; // if ( VDISK_READONLY(xd->info) ) // set_device_ro(device, 1); /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ capacity = (unsigned long)xd->capacity; print("capacity is %lud\n", capacity); return rc; } /* * Set up all the linux device goop for the virtual block devices (vbd's) that * we know about. Note that although from the backend driver's p.o.v. VBDs are * addressed simply an opaque 16-bit device number, the domain creation tools * conventionally allocate these numbers to correspond to those used by 'real' * linux -- this is just for convenience as it means e.g. that the same * /etc/fstab can be used when booting with or without Xen. */ int xlvbd_init(void) { int i; /* * If compiled as a module, we don't support unloading yet. We therefore * permanently increment the reference count to disallow it. */ vbd_info = malloc(MAX_VBDS * sizeof(vdisk_t)); nr_vbds = xlvbd_get_vbd_info(vbd_info); if ( nr_vbds < 0 ) { dp("============> nr_vbds is ZERO!\n"); free(vbd_info); vbd_info = nil; nr_vbds = 0; } else { for ( i = 0; i < nr_vbds; i++ ) { print("======> init device %d\n", i); xlvbd_init_device(&vbd_info[i]); } } return 0; } /****************************************************************************** * arch/xen/drivers/blkif/frontend/main.c * * Xenolinux virtual block-device driver. * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge */ typedef unsigned char byte; /* from linux/ide.h */ #define BLKIF_STATE_CLOSED 0 #define BLKIF_STATE_DISCONNECTED 1 #define BLKIF_STATE_CONNECTED 2 static unsigned int blkif_state = BLKIF_STATE_CLOSED; static unsigned int blkif_evtchn, blkif_irq; static int blkif_control_rsp_valid; static blkif_response_t blkif_control_rsp; static blkif_ring_t *blk_ring; static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */ static BLKIF_RING_IDX req_prod; /* Private request producer. */ static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for * recovery. Responses not stored here. */ static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for * recovery */ static int recovery = 0; /* "Recovery in progress" flag. Protected * by the io_request_lock */ /* We plug the I/O ring if the driver is suspended or if the ring is full. */ #define RING_PLUGGED (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \ (blkif_state != BLKIF_STATE_CONNECTED)) /* * Request queues with outstanding work, but ring is currently full. * We need no special lock here, as we always access this with the * io_request_lock held. We only need a small maximum list. */ #define MAX_PENDING 8 /* find a plan 9 equivalent static request_queue_t *pending_queues[MAX_PENDING]; static int nr_pending; */ static int sg_operation = -1; static unsigned long sg_next_sect; #define DISABLE_SCATTERGATHER() (sg_operation = -1) static void flush_requests(void) { DISABLE_SCATTERGATHER(); blk_ring->req_prod = req_prod; notify_via_evtchn(blkif_evtchn); } /* * blkif_queue_request * * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} * buffer: buffer to read/write into. this should be a * virtual address in the guest os. */ static int blkif_queue_request(unsigned long id, int operation, unsigned char * buffer, unsigned long sector_number, unsigned short nr_sectors, unsigned long device) { unsigned long buffer_ma = xen_va_to_ma(buffer); blkif_request_t *req; unsigned int fsect, lsect; fsect = (buffer_ma & (BY2PG-1)) >> 9; lsect = fsect + nr_sectors - 1; /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ if (((buffer_ma & ((1<<9)-1)) != 0) ) panic("buffer not sector-aligned\n"); if ( lsect > 7 ) panic("lsect > 7 in blkif_queue_request\n"); buffer_ma = PPN(buffer_ma); LOG(dp("buffer_ma is 0x%ulx, fsect 0x%x, lsect 0x%x\n", buffer_ma, fsect, lsect);) if ((blkif_state != BLKIF_STATE_CONNECTED) ) return 1; switch ( operation ) { case BLKIF_OP_WRITE: // dumpit(buffer, nr_sectors*512); case BLKIF_OP_READ: #ifdef NOT /* scatter_gather */ if ( (sg_operation == operation) && (sg_dev == device) && (sg_next_sect == sector_number) ) { req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req; req->id = id; req->frame_and_sects[req->nr_segments] = buffer_ma | (fsect<<3) | lsect; if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) sg_next_sect += nr_sectors; else DISABLE_SCATTERGATHER(); /* Update the copy of the request in the recovery ring. */ blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod - 1)].req = *req; return 0; } else #endif /* ring plugged, eh? I don't think so in Plan 9 ... if ( RING_PLUGGED ) { return 1; } */ #ifdef NOT else { sg_operation = operation; sg_dev = device; sg_next_sect = sector_number + nr_sectors; } #endif break; default: panic("unknown op %d\n", operation); } /* Fill out a communications ring structure. */ LOG(dp("Fill out the ring ...\n");) req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; req->id = id; req->operation = operation; req->sector_number = (blkif_sector_t)sector_number; req->device = device; req->nr_segments = 1; req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; req_prod++; LOG(dp("req_f_a_s is 0x%ulx\n", req->frame_and_sects[0]);) blk_ring->req_prod = req_prod; notify_via_evtchn(blkif_evtchn); LOG(dp("req_prod is now ... %d\n", req_prod);) /* Keep a private copy so we can reissue requests when recovering. */ /* eh? That can come later! blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req = *req; blk_ring_rec->req_prod++; */ return 0; } static void blkif_int(Ureg *, void *) { BLKIF_RING_IDX i; ilock(&io_request_lock); if ((blkif_state == BLKIF_STATE_CLOSED || recovery) ) { LOG( dp("Bailed out\n");) iunlock(&io_request_lock); return; } LOG(dp(" blkif_int before for loop\n");) for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) { blkif_response_t bret; unsigned char *c = (void *) &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; LOG(dp("probe looks like this:\n");) // dumpit(c, 7); bret.id = *(ulong *) c; bret.operation = *(uchar *) &c[4]; bret.status = *(ushort *) &c[5]; LOG(dp("bret id %lud op %d status %d\n", bret.id, bret.operation, bret.status);) switch ( bret.operation ) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: LOG(dp("interrupt for read or write, ...\n");) LOG( dp("ID is %lud\n", bret.id);) LOG(dp("Indicate done-ness ...\n");) *(unsigned long *)bret.id = 1; LOG(dp("all done I/O intr for blkif\n");) break; case BLKIF_OP_PROBE: memmove(&blkif_control_rsp, &bret, sizeof(bret)); LOG( dp("blkif_int: op probe returns ...\n");) blkif_control_rsp_valid = 1; break; default: LOG( dp("blkif_in: unkonw op %d\n", bret.operation);) break; } } resp_cons = i; resp_cons_rec = i; // kick_pending_request_queues(); iunlock(&io_request_lock); } void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) { retry: while ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) { // halt(); sched(); /* set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); */ } ilock(&io_request_lock); if ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) { iunlock(&io_request_lock); goto retry; } DISABLE_SCATTERGATHER(); memmove(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req)); memmove(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req, req, sizeof(*req)); req_prod++; flush_requests(); iunlock(&io_request_lock); while ( !blkif_control_rsp_valid ) { sched(); } memmove(rsp, &blkif_control_rsp, sizeof(*rsp)); blkif_control_rsp_valid = 0; } static void blkif_status_change(blkif_fe_interface_status_changed_t *status) { ctrl_msg_t cmsg; blkif_fe_interface_connect_t diskup; if ( status->handle != 0 ) { print( "Status change on unsupported blkif %d\n", status->handle); dp("=============> bad status change\n"); return; } print("===========> blkif_status_change to %d\n", status->status); switch ( status->status ) { case BLKIF_INTERFACE_STATUS_DESTROYED: print( "Unexpected blkif-DESTROYED message in state %d\n", blkif_state); break; case BLKIF_INTERFACE_STATUS_DISCONNECTED: if ( blkif_state != BLKIF_STATE_CLOSED ) { print( "Unexpected blkif-DISCONNECTED message" " in state %d\n", blkif_state); print( "VBD driver recovery in progress\n"); /* Prevent new requests being issued until we fix things up. */ ilock(&io_request_lock); recovery = 1; blkif_state = BLKIF_STATE_DISCONNECTED; iunlock(&io_request_lock); /* Free resources associated with old device channel. */ // free_page((unsigned long)blk_ring); // free_irq(blkif_irq, NULL); // unbind_evtchn_from_irq(blkif_evtchn); } /* Move from CLOSED to DISCONNECTED state. */ blk_ring = (blkif_ring_t *)xspanalloc(BY2PG, BY2PG, 0); /* avoid xspanalloc bug */ memset(blk_ring, 0, 4096); blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; blkif_state = BLKIF_STATE_DISCONNECTED; /* Construct an interface-CONNECT message for the domain controller. */ cmsg.type = CMSG_BLKIF_FE; cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; cmsg.length = sizeof(blkif_fe_interface_connect_t); diskup.handle = 0; diskup.shmem_frame = xen_mm_mfn(blk_ring) >> PGSHIFT; memmove(cmsg.msg, &diskup, sizeof(diskup)); /* Tell the controller to bring up the interface. */ ctrl_if_send_message_block(&cmsg, nil, 0, 0); break; case BLKIF_INTERFACE_STATUS_CONNECTED: if ( blkif_state == BLKIF_STATE_CLOSED ) { print( "Unexpected blkif-CONNECTED message" " in state %d\n", blkif_state); break; } blkif_evtchn = status->evtchn; print("===========> evtchn for blkif is %d\n", blkif_evtchn); blkif_irq = bind_evtchn_to_irq(blkif_evtchn, 0); intrenable(blkif_irq, blkif_int, 0, 0, "blkif"); print("====> enable disk interrupt\n"); if ( recovery ) { int i; /* Shouldn't need the io_request_lock here - the device is * plugged and the recovery flag prevents the interrupt handler * changing anything. */ /* Reissue requests from the private block ring. */ for ( i = 0; resp_cons_rec < blk_ring_rec->req_prod; resp_cons_rec++, i++ ) { blk_ring->ring[i].req = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req; } /* Reset the private block ring to match the new ring. */ memmove(blk_ring_rec, blk_ring, sizeof(*blk_ring)); resp_cons_rec = 0; /* blk_ring->req_prod will be set when we flush_requests().*/ blk_ring_rec->req_prod = req_prod = i; wmb(); /* Switch off recovery mode, using a memory barrier to ensure that * it's seen before we flush requests - we don't want to miss any * interrupts. */ recovery = 0; wmb(); /* Kicks things back into life. */ flush_requests(); } else { /* Probe for discs that are attached to the interface. */ dp("======> PROBE\n"); xlvbd_init(); } blkif_state = BLKIF_STATE_CONNECTED; /* Kick pending requests. * ilock(&io_request_lock); kick_pending_request_queues(); iunlock(&io_request_lock); */ print("=========> controller connected\n"); break; default: print( "Status change to unknown value %d\n", status->status); break; } } static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long) { dp("================>blkif_ctrlif_rx subtype %d\n", msg->subtype); switch ( msg->subtype ) { case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: print("===========> status changed! msg length %d want %d\n", msg->length, sizeof(blkif_fe_interface_status_changed_t)); if ( msg->length != 12 ) //SHITsizeof(blkif_fe_interface_status_changed_t) ) goto parse_error; blkif_status_change((blkif_fe_interface_status_changed_t *) &msg->msg[0]); break; #ifdef NOTNOWAY case CMSG_BLKIF_FE_VBD_STATUS_CHANGED: update_tq.routine = update_vbds_task; schedule_task(&update_tq); break; #endif default: dp("==========> PARSE ERROR!\n"); goto parse_error; } ctrl_if_send_response(msg); return; parse_error: dp("============> PARSE ERROR!\n"); msg->length = 0; ctrl_if_send_response(msg); } int xlblk_init(void) { ctrl_msg_t cmsg; blkif_fe_driver_status_changed_t st; LOG( dp("xlblk_init\n");) /* if ( (start_info.flags & SIF_INITDOMAIN) || (start_info.flags & SIF_BLK_BE_DOMAIN) ) return 0; */ print( "Initialising Xen virtual block device\n"); blk_ring_rec = mallocalign(BY2PG, BY2PG, 0, 0); memset(blk_ring_rec, 0, sizeof(*blk_ring_rec)); ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); /* Send a driver-UP notification to the domain controller. */ cmsg.type = CMSG_BLKIF_FE; cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED; cmsg.length = sizeof(blkif_fe_driver_status_changed_t); st.status = BLKIF_DRIVER_STATUS_UP; memmove(cmsg.msg, &st, sizeof(st)); ctrl_if_send_message_block(&cmsg, nil, 0, 0); LOG( dp("============>Sent a message to fire it up\n");) /* * We should read 'nr_interfaces' from response message and wait * for notifications before proceeding. For now we assume that we * will be notified of exactly one interface. */ /* put this in verify later! */ dp("============> DONE xlblk_init\n"); #ifdef NOT while ( blkif_state != BLKIF_STATE_CONNECTED ) { print("Sched() until it is ready\n"); sched(); } #endif return 0; } /* end of the Xen code */ extern SDifc sdxenifc; typedef struct Ctlr Ctlr; struct Ctlr { int readonly; SDev* sdev; }; /* we don't do more than on page? or do we? */ static long sdxenbio(SDunit* unit, int, int write, void* data, long nb, long bno) { long rlen; unsigned char *pages, *datap = data; int offset, nsects; if (unit->subno) { //print("%s: %d blocks, bno 0x%x\n", write ? "W" : "R", nb, bno); } pages = mallocalign(BY2PG, BY2PG, 0, 0); /* do waserror()/poperror() thing */ if (waserror()) { dp("free pages for error\n"); free(pages); nexterror(); } LOG(dp("sdxenbio, pages %p\n", pages);) /* eek. copy data */ if (write){ // dumpit(data, nb*512); } LOG(dp("sdxenbio: unit %p, write %d, data %p, nb %lud, bno 0x%lux\n", unit, write, data, nb, bno);) USED(unit); rlen = 0; while(nb) { int done; /* figure out the nsects given the offset into the page */ offset = bno & 7; nsects = 8 - offset; if (nsects > nb) nsects = nb; if (write) memmove(pages + offset*512, datap, nsects * 512); /* queue a request for this page, for nsects sectors */ LOG(dp("queue page %p, nsects %d, offset 0x%ux, bno 0x%lux\n", pages, nsects, offset, bno);) /**/ done = 0; blkif_queue_request((unsigned long) &done, write? BLKIF_OP_WRITE : BLKIF_OP_READ, (void *) ((unsigned long)pages + offset*512), bno, nsects, vbd_info[unit->subno].device); while (! done) sched(); /**/ /* if not ready, sched() until so */ nb -= nsects; LOG(dp("FIXME rlen computed wrong\n");) if (! write) memmove(datap, pages+offset * 512, nsects * 512); datap += nsects * 512; rlen += nsects * 512; bno += nsects; } if (! write){ // dumpit(data, rlen); } LOG(dp("ALL DONE sdxen bio!\n");) poperror(); free(pages); return rlen; } static int sdxenrio(SDreq*) { dp("sdxenrio will return -1\n"); /* * Useful at some point? */ return -1; } static int sdxenonline(SDunit* unit) { LOG(dp("sdxenonline\n");) /* * Set the unit sizes to whatever Xen gives you. */ //print("unit%d: nr_vbds %d\n", unit->subno, nr_vbds); if(nr_vbds <= 0 || unit->subno >= nr_vbds) return 0; unit->sectors = vbd_info[unit->subno].capacity; unit->secsize = 512; // if (VDISK_READONLY(vbd_info[0].info)) // unit->wpunit = 1; return 1; } static int sdxenenable(SDev*) { dp("sdxenenable\n"); /* * Stuff here to hook to interrupts, etc. */ return 1; } int sdxenverify(SDunit* unit) { /* * It's always there, right? */ print("sdxenverify+%d: nr %d\n", unit->subno, nr_vbds); while (blkif_state != BLKIF_STATE_CONNECTED) { dp("========> sched until bllkif is ready\n"); sched(); } dp("=========> BLKIF ready\n"); print("sdxenverify-%d: nr %d\n", unit->subno, nr_vbds); if(nr_vbds <= 0 || unit->subno >= nr_vbds) return 0; return 1; } static SDev* sdxenid(SDev* sdev) { /* * This could maybe be simpler if you know * how many devices, and so on in advance. * Then there might not be any need for the * sdscsi routines to be included at all. * If there's only one then you could just * do something like: char name[32]; snprint(name, sizeof(name), "sdX"); kstrdup(&sdev->name, name); return; */ dp("sdxenid\n"); /* * scsiid restricts the max. number of units * to 16 (see below in sdxenpnp). */ return scsiid(sdev, &sdxenifc); } static SDev* sdxenpnp(void) { Ctlr *ctlr; SDev *sdev; print("sdxenpnp\n"); /* * Probably don't need a Ctlr struct at all, * you may not have any state that isn't all held * in the SDev and SDunit structs. */ if((ctlr = malloc(sizeof(Ctlr))) == nil) return nil; if((sdev = malloc(sizeof(SDev))) == nil){ free(ctlr); return nil; } sdev->ifc = &sdxenifc; sdev->ctlr = ctlr; sdev->nunit = 16; xlblk_init(); if(sdev->nunit > 16) sdev->nunit = 16; ctlr->sdev = sdev; print("sdxenpnp returns %p, %d\n", sdev, sdev->nunit); return sdev; } SDifc sdxenifc = { "sdxen", /* name */ sdxenpnp, /* pnp */ nil, /* legacy */ sdxenid, /* id */ sdxenenable, /* enable */ nil, /* disable */ sdxenverify, /* verify */ sdxenonline, /* online */ sdxenrio, /* rio */ nil, /* rctl */ nil, /* wctl */ sdxenbio, /* bio */ nil, /* probe */ nil, /* clear */ nil, /* stat */ };