/****************************************************************************** * xen/mm/hypervisor.c * * Update page tables via the hypervisor. * * Copyright (c) 2002, K A Fraser */ #ifdef linux #include #include #include #include #include #include #include #include #include #endif #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "../xen/xen.h" #define LOG(a) /* wmb is a write memory barrier. I will leave it in here for now just as a marker * if we have troubles that might be cured by such a thing. #define wmb() */ #define physpfn_to_mfn(a) (xen_mm_mfn((void *) (a))) #define xen_pa_to_ma(a) (xen_va_to_ma((void *)(a))) /* * This suffices to protect us if we ever move to SMP domains. * Further, it protects us against interrupts. At the very least, this is * required for the network driver which flushes the update queue before * pushing new receive buffers. */ static Lock update_lock; //static spinlock_t update_lock = SPIN_LOCK_UNLOCKED; #define QUEUE_SIZE 2048 static mmu_update_t update_queue[QUEUE_SIZE]; unsigned int mmu_update_queue_idx = 0; #define idx mmu_update_queue_idx #undef MMU_UPDATE_DEBUG #undef MMU_UPDATE_DEBUG_RECORD #ifdef MMU_UPDATE_DEBUG_RECORD /* this ain't define anywhere page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}}; */ #undef queue_l1_entry_update #undef queue_l2_entry_update /* plan 9 doesn't go overboard with this pte_t stuff. We're in the arch * directory here. PTEs are unsigned longs and that's that. */ static void DEBUG_allow_pt_reads(void) { unsigned long *pte; mmu_update_t update; int i; for ( i = idx-1; i >= 0; i-- ) { pte = update_debug_queue[i].ptep; if ( pte == NULL ) continue; update_debug_queue[i].ptep = NULL; update.ptr = PADDR(pte); update.val = update_debug_queue[i].pteval; HYPERVISOR_mmu_update(&update, 1, NULL); } } static void DEBUG_disallow_pt_read(unsigned long va) { ulong *pte; unsigned long pteval; /* * We may fault because of an already outstanding update. * That's okay -- it'll get fixed up in the fault handler. */ mmu_update_t update; pte = mmuwalk(mach0->pdb, va, 0); update.ptr = virt_to_machine(pte); pteval = *(unsigned long *)pte; update.val = pteval & ~_PAGE_PRESENT; HYPERVISOR_mmu_update(&update, 1, NULL); update_debug_queue[idx].ptep = pte; update_debug_queue[idx].pteval = pteval; } #endif #ifdef MMU_UPDATE_DEBUG_RECORD #undef queue_pt_switch #undef queue_tlb_flush #undef queue_invlpg #undef queue_pgd_pin #undef queue_pgd_unpin #undef queue_pte_pin #undef queue_pte_unpin #endif #ifdef NOTYET /* * MULTICALL_flush_page_update_queue: * This is a version of the flush which queues as part of a multicall. */ void MULTICALL_flush_page_update_queue(void) { unsigned int _idx; ilock(&update_lock); if ( (_idx = idx) != 0 ) { #ifdef MMU_UPDATE_DEBUG dp("Flushing %d entries from pt update queue\n", idx); #endif #ifdef MMU_UPDATE_DEBUG_RECORD DEBUG_allow_pt_reads(); #endif idx = 0; wmb(); /* Make sure index is cleared first to avoid double updates. */ queue_multicall3(__HYPERVISOR_mmu_update, (unsigned long)update_queue, (unsigned long)_idx, (unsigned long)0); } iunlock(&update_lock); } #endif static void __flush_page_update_queue(void) { unsigned int _idx = idx; #ifdef MMU_UPDATE_DEBUG dp("Flushing %d entries from pt update queue\n", idx); #endif #ifdef MMU_UPDATE_DEBUG_RECORD DEBUG_allow_pt_reads(); #endif idx = 0; wmb(); /* Make sure index is cleared first to avoid double updates. */ if ((HYPERVISOR_mmu_update(update_queue, _idx, 0) < 0) ) panic("Failed to execute MMU updates"); } void _flush_page_update_queue(void) { ilock(&update_lock); if ( idx != 0 ) __flush_page_update_queue(); iunlock(&update_lock); } static void increment_index(void) { idx++; if ((idx == QUEUE_SIZE) ) __flush_page_update_queue(); } /* the 'val' here is a pfn with permission bits. We need to turn it into an MFN */ void queue_l1_entry_update(unsigned long *pteptr, unsigned long pval) { unsigned long mval; ilock(&update_lock); #ifdef MMU_UPDATE_DEBUG_RECORD DEBUG_disallow_pt_read((unsigned long)ptr); #endif mval = xen_pa_to_ma(pval); // dp("ql1ue: P 0x%ulx xmfn 0x%ulx mval 0x%ulx\n", // PADDR(pteptr), xen_va_to_ma(pteptr), mval); update_queue[idx].ptr = xen_va_to_ma(pteptr); update_queue[idx].val = mval; increment_index(); iunlock(&update_lock); } int set_va_mfn(void *va, unsigned long mfn, unsigned long perm) { unsigned long *pte; Mach *mach0 = (Mach *) MACHADDR; // dp("set_va_mfn: mach0 is %p\n", mach0); // dp("Try to mmuwalk ... probably will fail\n"); pte = mmuwalk(mach0->pdb, (unsigned long) va, 2, 0); // dp("pte for %p is %p\n", va, pte); if (! pte) return -1; // dp("queue request for va %p to be 0x%ulx\n", // (va), mfn<>PGSHIFT, (mfn<pte_low >> PAGE_SHIFT; queue_l1_entry_update(pte, 0); phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = 0xdeadbeef; } flush_page_update_queue(); dom_mem_op.op = MEMOP_RESERVATION_DECREASE; dom_mem_op.u.decrease.size = 1<>PAGE_SHIFT); phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i]; } flush_page_update_queue(); vfree(pfn_array); free_pages(vstart, order); } #endif /* CONFIG_XEN_PHYSDEV_ACCESS */ /* startup stuff, it is here because we don't want to reference the mfn outside of this file */ extended_start_info_t xen_start_info; volatile shared_info_t *HYPERVISOR_shared_info = 0; unsigned long *mfn; void xen_mm_startup(void) { /* start_info is at first page. */ xen_start_info = *((extended_start_info_t *) KZERO); mfn = (unsigned long *) xen_start_info.mfn_list; } void xen_mm_shared_info(void) { #ifdef NOT int i, j; volatile unsigned char *cp; #endif HYPERVISOR_shared_info = (shared_info_t *)0x80002000; /* set by xc_plan9_build */ #ifdef NOT /**/ set_va_mfn(HYPERVISOR_shared_info, xen_start_info.shared_info, PTEWRITE|PTEVALID); /**/ #endif #ifdef NOT cp = (unsigned char *)HYPERVISOR_shared_info; for(i = 0; i < 4096; i += 16) { dp("0x%x: ", i); for(j = 0; j < 16; j++) { volatile unsigned char dpv; // if (((i+j)<8) && ((i+j)>0)) // cp[i+j] = 0; if (cp[i+j]) dp("%02x ", cp[i+j]); dpv = cp[i+j]; cp[i+j] = dpv; } dp("\n"); } #endif } static unsigned long ma_to_pa_map[1<<20]; extern void xen_meminit(unsigned long, unsigned long, unsigned long, unsigned long); void xen_mm_meminit(void) { int i; xen_meminit( xen_start_info.pt_base, xen_start_info.nr_pt_frames, xen_start_info.mfn_list, xen_start_info.nr_pages); for(i = 0; i < xen_start_info.nr_pages; i++) ma_to_pa_map[mfn[i]] = i; } void xen_mm_info(void){ extended_start_info_t *x = &xen_start_info; dp("xen_start_info\n"); dp(" nr_pages %uld\n", x->nr_pages); dp(" shared_info 0x%ulx\n", x->shared_info); dp(" flags 0x%ux\n", x->flags); dp(" pt_base 0x%ulx\n", x->pt_base); dp(" nr_pt_frames %uld\n", x->nr_pt_frames); dp(" mfn_list 0x%ulx\n", x->mfn_list); dp(" shared info %p\n", HYPERVISOR_shared_info); dp(" mfn %p\n", mfn); dp(" mfn[0] 0x%ulx\n", mfn[0]); } /* note that because of the Plan 9 KADDR/PADDR scheme, this function actually * works fine for BOTH kernel virtual address and physical addresses */ /* this one should get the frame, but you need a VA to MA function, idiot! */ extern unsigned long *mfn; unsigned long xen_mm_mfn(void *va) { unsigned long pmfn; pmfn = mfn[PADDR(va)>>PGSHIFT]; // LOG(dp("PMFN: 0x%ulx\n", pmfn)); pmfn <<= PGSHIFT; // LOG(dp("PMFN: return 0x%lx\n", pmfn)); return pmfn; } /* well, this sucks, but you can't really build a table as things can change * out from under you */ unsigned long xen_ma_to_pa(unsigned long ma) { unsigned long offset = ((unsigned long)ma) & (BY2PG-1); unsigned long pfn, pa; ma >>= PGSHIFT; pfn = ma_to_pa_map[ma]; pa = pfn << PGSHIFT; pa |= offset; // dp("xen_ma_to_pa: ma 0x%ulx, pa 0x%ulx\n", ma, pa); return pa; } unsigned long xen_va_to_ma(void *va) { unsigned long frame = xen_mm_mfn(va); unsigned long offset = ((unsigned long)va) & (BY2PG -1); unsigned long retval = frame | offset; return retval; } void xen_mm_readonly(void *vva) { unsigned long va = (unsigned long) vva; /* dp("xen_readonly: 0x%ulx set to 0x%ulx flags 0x%x\n", ((unsigned long)va)>>PGSHIFT, (xen_mm_mfn(vva))|PTEVALID|PTERONLY, UVMF_INVLPG); */ HYPERVISOR_update_va_mapping(((unsigned long)va)>>PGSHIFT, (xen_mm_mfn(vva))|PTEVALID|PTERONLY, UVMF_INVLPG); } void xen_mm_readwrite(void *vva) { unsigned long va = (unsigned long) vva; /* dp("xen_readwrite: 0x%ulx set to 0x%ulx flags 0x%x\n", ((unsigned long)va)>>PGSHIFT, (xen_mm_mfn(vva))|PTEVALID|PTEWRITE, UVMF_INVLPG); */ HYPERVISOR_update_va_mapping(((unsigned long)va)>>PGSHIFT, (xen_mm_mfn(vva))|PTEVALID|PTEWRITE, UVMF_INVLPG); } void xen_mm_setl2(void *l2, unsigned long *pteptr) { LOG(dp(" quee l2 entry update for 0x%ulx\n", pteptr)); LOG(dp("0x%ulx set to 0x%ulx flags 0x%x\n", ((unsigned long)l2)>>PGSHIFT, xen_mm_mfn(l2)|PTEVALID|PTEWRITE, UVMF_INVLPG)); HYPERVISOR_update_va_mapping(((unsigned long)l2)>>PGSHIFT, xen_mm_mfn(l2)|PTEVALID|PTERONLY, UVMF_INVLPG); queue_l2_entry_update(pteptr, xen_mm_mfn(l2)|PTEUSER|PTEWRITE|PTEVALID); /* have to do this here! */ /* could be fancy and do tricks but won't. */ _flush_page_update_queue(); } int xen_mm_decrease_reservation(unsigned long *pfn_array, int npfn) { int ret; ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, pfn_array, npfn, 0 ); if (ret < 0) { dp( "Unable to reduce memory reservation (%d)\n", ret); } return ret; }