#include "all.h" #include "mem.h" #include "io.h" #include "ureg.h" /* * task state segment. Plan 9 ignores all the task switching goo and just * uses the tss for esp0 and ss0 on gate's into the kernel, interrupts, * and exceptions. The rest is completely ignored. * * This means that we only need one tss in the whole system. */ typedef struct Tss Tss; struct Tss { ulong backlink; /* unused */ ulong sp0; /* pl0 stack pointer */ ulong ss0; /* pl0 stack selector */ ulong sp1; /* pl1 stack pointer */ ulong ss1; /* pl1 stack selector */ ulong sp2; /* pl2 stack pointer */ ulong ss2; /* pl2 stack selector */ ulong cr3; /* page table descriptor */ ulong eip; /* instruction pointer */ ulong eflags; /* processor flags */ ulong eax; /* general (hah?) registers */ ulong ecx; ulong edx; ulong ebx; ulong esp; ulong ebp; ulong esi; ulong edi; ulong es; /* segment selectors */ ulong cs; ulong ss; ulong ds; ulong fs; ulong gs; ulong ldt; /* local descriptor table */ ulong iomap; /* io map base */ }; Tss tss; /* * segment descriptor initializers */ #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } #define CALLGATE(s,o,p) { ((o)&0xFFFF)|((s)<<16), (o)&0xFFFF0000|SEGP|SEGPL(p)|SEGCG } #define D16SEGM(p) { 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } #define E16SEGM(p) { 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\ ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP } /* * global descriptor table describing all segments */ Segdesc gdt[] = { [NULLSEG] { 0, 0}, /* null descriptor */ [KDSEG] DATASEGM(0), /* kernel data/stack */ [KESEG] EXECSEGM(0), /* kernel code */ [UDSEG] DATASEGM(3), /* user data/stack */ [UESEG] EXECSEGM(3), /* user code */ [TSSSEG] TSSSEGM(0,0), /* tss segment */ }; static struct { ulong va; ulong pa; } ktoppg; /* prototype top level page table * containing kernel mappings */ static ulong *kpt; /* 2nd level page tables for kernel mem */ #define ROUNDUP(s,v) (((s)+(v-1))&~(v-1)) /* * offset of virtual address into * top level page table */ #define TOPOFF(v) (((ulong)(v))>>(2*PGSHIFT-2)) /* * offset of virtual address into * bottom level page table */ #define BTMOFF(v) ((((ulong)(v))>>(PGSHIFT))&(WD2PG-1)) /* * Change current page table and the stack to use for exceptions * (traps & interrupts). The exception stack comes from the tss. * Since we use only one tss, (we hope) there's no need for a * puttr(). */ static void taskswitch(ulong pagetbl, ulong stack) { tss.ss0 = KDSEL; tss.sp0 = stack; tss.ss1 = KDSEL; tss.sp1 = stack; tss.ss2 = KDSEL; tss.sp2 = stack; tss.cr3 = pagetbl; putcr3(pagetbl); } /* * Create a prototype page map that maps all of memory into * kernel (KZERO) space. This is the default map. It is used * whenever the processor is not running a process or whenever running * a process which does not yet have its own map. */ void mmuinit(void) { int i, nkpt, npage, nbytes; ulong x; ulong y; ulong *top; /* * set up the global descriptor table. we make the tss entry here * since it requires arithmetic on an address and hence cannot * be a compile or link time constant. */ x = (ulong)&tss; gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss); gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; putgdt(gdt, sizeof gdt); /* * set up system page tables. * map all of physical memory to start at KZERO. * leave a map entry for a user area. */ /* * allocate top level table */ top = ialloc(BY2PG, BY2PG); ktoppg.va = (ulong)top; ktoppg.pa = ktoppg.va & ~KZERO; /* map all memory to KZERO */ npage = mconf.topofmem/BY2PG; nbytes = PGROUND(npage*BY2WD); /* words of page map */ nkpt = nbytes/BY2PG; /* pages of page map */ kpt = ialloc(nbytes, BY2PG); for(i = 0; i < npage; i++) kpt[i] = (0+i*BY2PG) | PTEVALID | PTEKERNEL | PTEWRITE; x = TOPOFF(KZERO); y = ((ulong)kpt)&~KZERO; for(i = 0; i < nkpt; i++) top[x+i] = (y+i*BY2PG) | PTEVALID | PTEKERNEL | PTEWRITE; /* * set up the task segment */ memset(&tss, 0, sizeof(tss)); taskswitch(ktoppg.pa, BY2PG + (ulong)m); puttr(TSSSEL);/**/ } /* * used to map a page into 16 meg - BY2PG for confinit(). tpt is the temporary * page table set up by l.s. */ long* mapaddr(ulong addr) { ulong base; ulong off; static ulong *pte, top; extern ulong tpt[]; if(pte == 0){ top = (((ulong)tpt)+(BY2PG-1))&~(BY2PG-1); pte = (ulong*)top; top &= ~KZERO; top += BY2PG; pte += (4*1024*1024-BY2PG)>>PGSHIFT; } base = off = addr; base &= ~(KZERO|(BY2PG-1)); off &= BY2PG-1; *pte = base|PTEVALID|PTEKERNEL|PTEWRITE; /**/ putcr3((ulong)top); return (long*)(KZERO | 4*1024*1024-BY2PG | off); } #define PDX(va) ((((ulong)(va))>>22) & 0x03FF) #define PTX(va) ((((ulong)(va))>>12) & 0x03FF) #define PPN(x) ((x)&~(BY2PG-1)) #define KADDR(a) ((void*)((ulong)(a)|KZERO)) ulong* mmuwalk(ulong* pdb, ulong va, int level, int create) { ulong pa, *table; /* * Walk the page-table pointed to by pdb and return a pointer * to the entry for virtual address va at the requested level. * If the entry is invalid and create isn't requested then bail * out early. Otherwise, for the 2nd level walk, allocate a new * page-table page and register it in the 1st level. */ table = &pdb[PDX(va)]; if(!(*table & PTEVALID) && create == 0) return 0; switch(level){ default: return 0; case 1: return table; case 2: if(*table & PTESIZE) panic("mmuwalk2: va 0x%ux entry 0x%ux\n", va, *table); if(!(*table & PTEVALID)){ pa = PADDR(ialloc(BY2PG, BY2PG)); *table = pa|PTEWRITE|PTEVALID; } table = KADDR(PPN(*table)); return &table[PTX(va)]; } } static Lock mmukmaplock; ulong mmukmap(ulong pa, ulong va, int size) { ulong pae, *table, *pdb, pgsz, *pte, x; int pse, sync; extern int cpuidax, cpuiddx; pdb = (ulong*)ktoppg.va; if((cpuiddx & 0x08) && (getcr4() & 0x10)) pse = 1; else pse = 0; sync = 0; pa = PPN(pa); if(va == 0) va = (ulong)KADDR(pa); else va = PPN(va); pae = pa + size; lock(&mmukmaplock); while(pa < pae){ table = &pdb[PDX(va)]; /* * Possibly already mapped. */ if(*table & PTEVALID){ if(*table & PTESIZE){ /* * Big page. Does it fit within? * If it does, adjust pgsz so the correct end can be * returned and get out. * If not, adjust pgsz up to the next 4MB boundary * and continue. */ x = PPN(*table); if(x != pa) panic("mmukmap1: pa 0x%ux entry 0x%ux\n", pa, *table); x += 4*MB; if(pae <= x){ pa = pae; break; } pgsz = x - pa; pa += pgsz; va += pgsz; continue; } else{ /* * Little page. Walk to the entry. * If the entry is valid, set pgsz and continue. * If not, make it so, set pgsz, sync and continue. */ pte = mmuwalk(pdb, va, 2, 0); if(pte && *pte & PTEVALID){ x = PPN(*pte); if(x != pa) panic("mmukmap2: pa 0x%ux entry 0x%ux\n", pa, *pte); pgsz = BY2PG; pa += pgsz; va += pgsz; sync++; continue; } } } /* * Not mapped. Check if it can be mapped using a big page - * starts on a 4MB boundary, size >= 4MB and processor can do it. * If not a big page, walk the walk, talk the talk. * Sync is set. */ if(pse && (pa % (4*MB)) == 0 && (pae >= pa+4*MB)){ *table = pa|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID; pgsz = 4*MB; } else{ pte = mmuwalk(pdb, va, 2, 1); *pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID; pgsz = BY2PG; } pa += pgsz; va += pgsz; sync++; } unlock(&mmukmaplock); /* * If something was added * then need to sync up. */ if(sync) putcr3(ktoppg.pa); return pa; } ulong upamalloc(ulong addr, int size, int align) { ulong ae; /* * Another horrible hack because * I CAN'T BE BOTHERED WITH THIS FILESERVER BEING * COMPLETELY INCOMPATIBLE ANYMORE. */ if((addr < mconf.topofmem) || align) panic("upamalloc: (0x%lux < 0x%lux) || %d\n", addr, mconf.topofmem, align); ae = mmukmap(addr, 0, size); /* * Should check here that it was all delivered * and put it back and barf if not. */ USED(ae); /* * Be very careful this returns a PHYSICAL address. */ return addr; }