diff options
-rw-r--r-- | sys/i386/i386/busdma_machdep.c | 5 | ||||
-rw-r--r-- | sys/i386/i386/genassym.c | 6 | ||||
-rw-r--r-- | sys/i386/i386/machdep.c | 314 | ||||
-rw-r--r-- | sys/i386/i386/support.s | 4 | ||||
-rw-r--r-- | sys/i386/i386/sys_machdep.c | 96 | ||||
-rw-r--r-- | sys/i386/i386/trap.c | 15 | ||||
-rw-r--r-- | sys/i386/i386/vm_machdep.c | 22 | ||||
-rw-r--r-- | sys/i386/include/asmacros.h | 57 | ||||
-rw-r--r-- | sys/i386/include/cpufunc.h | 44 | ||||
-rw-r--r-- | sys/i386/include/pcpu.h | 24 | ||||
-rw-r--r-- | sys/i386/include/pmap.h | 71 | ||||
-rw-r--r-- | sys/i386/include/segments.h | 15 | ||||
-rw-r--r-- | sys/i386/include/trap.h | 3 | ||||
-rw-r--r-- | sys/i386/include/vmparam.h | 9 | ||||
-rw-r--r-- | sys/i386/pci/pci_cfgreg.c | 10 | ||||
-rw-r--r-- | sys/i386/pci/pci_pir.c | 9 |
16 files changed, 687 insertions, 17 deletions
diff --git a/sys/i386/i386/busdma_machdep.c b/sys/i386/i386/busdma_machdep.c index 7069d2d5ac78..4ff4f920abb3 100644 --- a/sys/i386/i386/busdma_machdep.c +++ b/sys/i386/i386/busdma_machdep.c @@ -144,6 +144,11 @@ int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); int _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); +#ifdef XEN +#undef pmap_kextract +#define pmap_kextract pmap_kextract_ma +#endif + /* * Return true if a match is made. * diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index c1df8db791af..4a76e0ae8b21 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -228,3 +228,9 @@ ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame)); ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base)); ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat)); #endif + +#ifdef XEN +#include <machine/xen/hypervisor.h> +ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3)); +ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START); +#endif diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index e38015f35dd4..d2d3e7c6a933 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -141,6 +141,25 @@ int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif +#ifdef XEN +/* XEN includes */ +#include <machine/xen/xen-os.h> +#include <machine/xen/hypervisor.h> +#include <machine/xen/xen-os.h> +#include <machine/xen/xenvar.h> +#include <machine/xen/xenfunc.h> +#include <machine/xen/xen_intr.h> + +void Xhypervisor_callback(void); +void failsafe_callback(void); + +extern trap_info_t trap_table[]; +struct proc_ldt default_proc_ldt; +extern int init_first; +int running_xen = 1; +extern unsigned long physfree; +#endif /* XEN */ + /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); @@ -287,8 +306,9 @@ cpu_startup(dummy) */ bufinit(); vm_pager_bufferinit(); - +#ifndef XEN cpu_setregs(); +#endif } /* @@ -1118,6 +1138,24 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) return (0); } + +void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ + +#ifdef XEN + +void +cpu_halt(void) +{ + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +} + +static void +cpu_idle_hlt(int busy) +{ + idle_block(); +} + +#else /* * Shutdown the CPU as much as possible */ @@ -1128,8 +1166,6 @@ cpu_halt(void) __asm__ ("hlt"); } -void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ - static void cpu_idle_hlt(int busy) { @@ -1143,6 +1179,7 @@ cpu_idle_hlt(int busy) else __asm __volatile("sti; hlt"); } +#endif static void cpu_idle_acpi(int busy) @@ -1437,10 +1474,16 @@ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, */ int _default_ldt; + +#ifdef XEN +union descriptor *gdt; +union descriptor *ldt; +#else union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ +union descriptor ldt[NLDT]; /* local descriptor table */ +#endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ -union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ @@ -1542,6 +1585,7 @@ struct soft_segment_descriptor gdt_segs[] = { 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, +#ifndef XEN /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ @@ -1633,6 +1677,7 @@ struct soft_segment_descriptor gdt_segs[] = { 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, +#endif /* !XEN */ }; static struct soft_segment_descriptor ldt_segs[] = { @@ -1870,7 +1915,16 @@ getmemsize(int first) goto physmap_done; } #endif - +#if defined(XEN) + has_smap = 0; + Maxmem = xen_start_info->nr_pages - init_first; + physmem = Maxmem; + basemem = 0; + physmap[0] = init_first << PAGE_SHIFT; + physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE); + physmap_idx = 0; + goto physmap_done; +#endif hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(vmf)); @@ -2032,7 +2086,7 @@ int15e820: vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; -#else +#elif !defined(XEN) /* * Prefer the RTC value for extended memory. */ @@ -2123,6 +2177,7 @@ physmap_done: getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; +#ifndef XEN /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. @@ -2240,7 +2295,11 @@ do_next: } *pte = 0; invltlb(); - +#else + phys_avail[0] = physfree; + phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; +#endif + /* * XXX * The last chunk must contain at least one page plus the message @@ -2265,6 +2324,246 @@ do_next: off); } +#ifdef XEN +#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) + +void +init386(first) + int first; +{ + struct gate_descriptor *gdp; + unsigned long gdtmachpfn; + int error, gsel_tss, metadata_missing, x; + struct pcpu *pc; + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, + }; + + thread0.td_kstack = proc0kstack; + thread0.td_pcb = (struct pcb *) + (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + + /* + * This may be done better later if it gets more high level + * components in it. If so just link td->td_proc here. + */ + proc_linkup0(&proc0, &thread0); + + metadata_missing = 0; + if (xen_start_info->mod_start) { + preload_metadata = (caddr_t)xen_start_info->mod_start; + preload_bootstrap_relocate(KERNBASE); + } else { + metadata_missing = 1; + } + if (envmode == 1) + kern_envp = static_env; + else if (bootinfo.bi_envp) + kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; + + /* Init basic tunables, hz etc */ + init_param1(); + + /* + * XEN occupies a portion of the upper virtual address space + * At its base it manages an array mapping machine page frames + * to physical page frames - hence we need to be able to + * access 4GB - (64MB - 4MB + 64k) + */ + gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); + gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); + gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); + gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); + gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); + gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); + + pc = &__pcpu[0]; + gdt_segs[GPRIV_SEL].ssd_base = (int) pc; + gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; + + PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); + bzero(gdt, PAGE_SIZE); + for (x = 0; x < NGDT; x++) + ssdtosd(&gdt_segs[x], &gdt[x].sd); + + mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); + + gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; + PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~(PG_RW|PG_M|PG_A)); + PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); + lgdt(&r_gdt); + gdtset = 1; + + if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { + panic("set_trap_table failed - error %d\n", error); + } + + error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); + if (error == 0) + error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); +#if CONFIG_XEN_COMPAT <= 0x030002 + if (error == -ENOXENSYS) + HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), + (unsigned long)Xhypervisor_callback, + GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); +#endif + pcpu_init(pc, 0, sizeof(struct pcpu)); + PCPU_SET(prvspace, pc); + PCPU_SET(curthread, &thread0); + PCPU_SET(curpcb, thread0.td_pcb); + + /* + * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. + */ + mutex_init(); + mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); + + /* make ldt memory segments */ + ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); + ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); + for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) + ssdtosd(&ldt_segs[x], &ldt[x].sd); + + default_proc_ldt.ldt_base = (caddr_t)ldt; + default_proc_ldt.ldt_len = 6; + _default_ldt = (int)&default_proc_ldt; + PCPU_SET(currentldt, _default_ldt) + PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); + xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); + +#if defined(XEN_PRIVILEGED) + /* + * Initialize the i8254 before the console so that console + * initialization can use DELAY(). + */ + i8254_init(); +#endif + + /* + * Initialize the console before we print anything out. + */ + cninit(); + + if (metadata_missing) + printf("WARNING: loader(8) metadata is missing!\n"); + +#ifdef DEV_ISA + elcr_probe(); + atpic_startup(); +#endif + +#ifdef DDB + ksym_start = bootinfo.bi_symtab; + ksym_end = bootinfo.bi_esymtab; +#endif + + kdb_init(); + +#ifdef KDB + if (boothowto & RB_KDB) + kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); +#endif + + finishidentcpu(); /* Final stage of CPU initialization */ + setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + initializecpu(); /* Initialize CPU registers */ + + /* make an initial tss so cpu can get interrupt stack on syscall! */ + /* Note: -16 is so we can grow the trapframe if we came from vm86 */ + PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); + PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); + PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); + PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); + PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); + ltr(gsel_tss); + + /* pointer to selector slot for %fs/%gs */ + PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); + + dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = + dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; + dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = + dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else + dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif + dblfault_tss.tss_eip = (int)dblfault_handler; + dblfault_tss.tss_eflags = PSL_KERNEL; + dblfault_tss.tss_ds = dblfault_tss.tss_es = + dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); + dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); + dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); + dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); + + vm86_initialize(); + getmemsize(first); + init_param2(physmem); + + /* now running on new page tables, configured,and u/iom is accessible */ + + msgbufinit(msgbufp, MSGBUF_SIZE); + + /* make a call gate to reenter kernel with */ + gdp = &ldt[LSYS5CALLS_SEL].gd; + + x = (int) &IDTVEC(lcall_syscall); + gdp->gd_looffset = x; + gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); + gdp->gd_stkcpy = 1; + gdp->gd_type = SDT_SYS386CGT; + gdp->gd_dpl = SEL_UPL; + gdp->gd_p = 1; + gdp->gd_hioffset = x >> 16; + + /* XXX does this work? */ + /* XXX yes! */ + ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; + ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; + + /* transfer to user mode */ + + _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); + _udatasel = GSEL(GUDATA_SEL, SEL_UPL); + + /* setup proc 0's pcb */ + thread0.td_pcb->pcb_flags = 0; +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else + thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif + thread0.td_pcb->pcb_ext = 0; + thread0.td_frame = &proc0_tf; + thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; + thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; +} + +#else void init386(first) int first; @@ -2527,6 +2826,7 @@ init386(first) thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } +#endif void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 5c8cd857c37d..cbf0d8280739 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -1459,10 +1459,12 @@ END(bcmp) */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) +#ifndef XEN /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) - +#endif + /* flush the prefetch q */ jmp 1f nop diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c index 97ebe3477be4..f792f2a5d2a6 100644 --- a/sys/i386/i386/sys_machdep.c +++ b/sys/i386/i386/sys_machdep.c @@ -58,6 +58,20 @@ __FBSDID("$FreeBSD$"); #include <security/audit/audit.h> +#ifdef XEN +#include <machine/xen/xenfunc.h> + +void i386_reset_ldt(struct proc_ldt *pldt); + +void +i386_reset_ldt(struct proc_ldt *pldt) +{ + xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); +} +#else +#define i386_reset_ldt(x) +#endif + #include <vm/vm_kern.h> /* for kernel_map */ #define MAX_LD 8192 @@ -164,7 +178,12 @@ sysarch(td, uap) */ sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; +#ifdef XEN + /* need to do nosegneg like Linux */ + sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; +#else sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ +#endif sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; @@ -174,7 +193,12 @@ sysarch(td, uap) sd.sd_gran = 1; critical_enter(); td->td_pcb->pcb_fsd = sd; +#ifdef XEN + HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]), + *(uint64_t *)&sd); +#else PCPU_GET(fsgs_gdt)[0] = sd; +#endif critical_exit(); td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); } @@ -194,7 +218,13 @@ sysarch(td, uap) */ sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; + +#ifdef XEN + /* need to do nosegneg like Linux */ + sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; +#else sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ +#endif sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; @@ -204,7 +234,12 @@ sysarch(td, uap) sd.sd_gran = 1; critical_enter(); td->td_pcb->pcb_gsd = sd; +#ifdef XEN + HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]), + *(uint64_t *)&sd); +#else PCPU_GET(fsgs_gdt)[1] = sd; +#endif critical_exit(); load_gs(GSEL(GUGS_SEL, SEL_UPL)); } @@ -360,6 +395,10 @@ set_user_ldt(struct mdproc *mdp) } pldt = mdp->md_ldt; +#ifdef XEN + i386_reset_ldt(pldt); + PCPU_SET(currentldt, (int)pldt); +#else #ifdef SMP gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; #else @@ -367,6 +406,7 @@ set_user_ldt(struct mdproc *mdp) #endif lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); +#endif /* XEN */ if (dtlocked) mtx_unlock_spin(&dt_lock); } @@ -385,6 +425,44 @@ set_user_ldt_rv(struct vmspace *vmsp) } #endif +#ifdef XEN + +/* + * dt_lock must be held. Returns with dt_lock held. + */ +struct proc_ldt * +user_ldt_alloc(struct mdproc *mdp, int len) +{ + struct proc_ldt *pldt, *new_ldt; + + mtx_assert(&dt_lock, MA_OWNED); + mtx_unlock_spin(&dt_lock); + MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt), + M_SUBPROC, M_WAITOK); + + new_ldt->ldt_len = len = NEW_MAX_LD(len); + new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, + round_page(len * sizeof(union descriptor))); + if (new_ldt->ldt_base == NULL) { + FREE(new_ldt, M_SUBPROC); + return NULL; + } + new_ldt->ldt_refcnt = 1; + new_ldt->ldt_active = 0; + + if ((pldt = mdp->md_ldt)) { + if (len > pldt->ldt_len) + len = pldt->ldt_len; + bcopy(pldt->ldt_base, new_ldt->ldt_base, + len * sizeof(union descriptor)); + } else { + bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); + } + pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, + new_ldt->ldt_len*sizeof(union descriptor)); + return new_ldt; +} +#else /* * dt_lock must be held. Returns with dt_lock held. */ @@ -423,6 +501,7 @@ user_ldt_alloc(struct mdproc *mdp, int len) return (new_ldt); } +#endif /* !XEN */ /* * Must be called with dt_lock held. Returns with dt_lock unheld. @@ -667,7 +746,23 @@ again: td->td_retval[0] = uap->start; return (error); } +#ifdef XEN +static int +i386_set_ldt_data(struct thread *td, int start, int num, + union descriptor *descs) +{ + struct mdproc *mdp = &td->td_proc->p_md; + struct proc_ldt *pldt = mdp->md_ldt; + mtx_assert(&dt_lock, MA_OWNED); + + /* Fill in range */ + bcopy(descs, + &((union descriptor *)(pldt->ldt_base))[start], + num * sizeof(union descriptor)); + return (0); +} +#else static int i386_set_ldt_data(struct thread *td, int start, int num, union descriptor *descs) @@ -683,6 +778,7 @@ i386_set_ldt_data(struct thread *td, int start, int num, num * sizeof(union descriptor)); return (0); } +#endif /* !XEN */ static int i386_ldt_grow(struct thread *td, int len) diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 3306e7c8dbb8..999e73d389cb 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -123,6 +123,14 @@ dtrace_doubletrap_func_t dtrace_doubletrap_func; systrace_probe_func_t systrace_probe_func; #endif +#ifdef XEN +#include <sys/syslog.h> +#include <machine/xen/xenfunc.h> +#include <machine/xen/hypervisor.h> +#include <machine/xen/xenvar.h> +#include <machine/xen/evtchn.h> +#endif + extern void trap(struct trapframe *frame); extern void syscall(struct trapframe *frame); @@ -151,7 +159,11 @@ static char *trap_msg[] = { "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ +#ifdef XEN + "hypervisor callback", /* 17 T_HYPCALLBACK*/ +#else "", /* 17 unused */ +#endif "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ @@ -258,6 +270,7 @@ trap(struct trapframe *frame) goto out; #endif +#ifndef XEN if ((frame->tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled @@ -288,7 +301,7 @@ trap(struct trapframe *frame) enable_intr(); } } - +#endif /* !XEN */ eva = 0; code = frame->tf_err; if (type == T_PAGEFLT) { diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index c5d381c42aa0..a4b1c5c70bd2 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -88,6 +88,9 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_map.h> #include <vm/vm_param.h> +#ifdef XEN +#include <machine/xen/hypervisor.h> +#endif #ifdef PC98 #include <pc98/cbus/cbus.h> #else @@ -264,6 +267,9 @@ cpu_fork(td1, p2, td2, flags) /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; + /* + * XXX XEN need to check on PSL_USER is handled + */ td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; /* @@ -594,6 +600,9 @@ cpu_reset_real() #endif disable_intr(); +#ifdef XEN + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +#endif #ifdef CPU_ELAN if (elan_mmcr != NULL) elan_mmcr->RESCFG = 1; @@ -759,7 +768,12 @@ sf_buf_alloc(struct vm_page *m, int flags) */ ptep = vtopte(sf->kva); opte = *ptep; +#ifdef XEN + PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag + | PG_RW | PG_V); +#else *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V; +#endif /* * Avoid unnecessary TLB invalidations: If the sf_buf's old @@ -809,6 +823,14 @@ sf_buf_free(struct sf_buf *sf) if (sf->ref_count == 0) { TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); nsfbufsused--; +#ifdef XEN +/* + * Xen doesn't like having dangling R/W mappings + */ + pmap_qremove(sf->kva, 1); + sf->m = NULL; + LIST_REMOVE(sf, list_entry); +#endif if (sf_buf_alloc_want > 0) wakeup_one(&sf_buf_freelist); } diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h index fbc93b7d69d5..c1c3f645cbf5 100644 --- a/sys/i386/include/asmacros.h +++ b/sys/i386/include/asmacros.h @@ -176,6 +176,63 @@ movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \ movl %eax, %fs +#ifdef XEN +#define LOAD_CR3(reg) \ + movl reg,PCPU(CR3); \ + pushl %ecx ; \ + pushl %edx ; \ + pushl %esi ; \ + pushl reg ; \ + call xen_load_cr3 ; \ + addl $4,%esp ; \ + popl %esi ; \ + popl %edx ; \ + popl %ecx ; \ + +#define READ_CR3(reg) movl PCPU(CR3),reg; +#define LLDT(arg) \ + pushl %edx ; \ + pushl %eax ; \ + xorl %eax,%eax ; \ + movl %eax,%gs ; \ + call i386_reset_ldt ; \ + popl %eax ; \ + popl %edx +#define CLI call ni_cli +#else +#define LOAD_CR3(reg) movl reg,%cr3; +#define READ_CR3(reg) movl %cr3,reg; +#define LLDT(arg) lldt arg; +#define CLI cli +#endif /* !XEN */ + + #endif /* LOCORE */ +#ifdef __STDC__ +#define ELFNOTE(name, type, desctype, descdata...) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz #name ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection +#else /* !__STDC__, i.e. -traditional */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection +#endif /* __STDC__ */ + #endif /* !_MACHINE_ASMACROS_H_ */ diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index fc61eab9b5f5..948bde1c3079 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -42,6 +42,16 @@ #error this file needs sys/cdefs.h as a prerequisite #endif +#ifdef XEN +extern void xen_cli(void); +extern void xen_sti(void); +extern void xen_load_cr3(u_int data); +extern void xen_tlb_flush(void); +extern void xen_invlpg(u_int addr); +extern void xen_save_and_cli(u_int *eflags); +extern void xen_restore_flags(u_int eflags); +#endif + struct region_descriptor; #define readb(va) (*(volatile u_int8_t *) (va)) @@ -81,7 +91,11 @@ bsrl(u_int mask) static __inline void disable_intr(void) { - __asm __volatile("cli" : : : "memory"); +#ifdef XEN + xen_cli(); +#else + __asm __volatile("cli" : : : "memory"); +#endif } static __inline void @@ -103,7 +117,11 @@ cpuid_count(u_int ax, u_int cx, u_int *p) static __inline void enable_intr(void) { +#ifdef XEN + xen_sti(); +#else __asm __volatile("sti"); +#endif } static inline void @@ -412,8 +430,11 @@ rcr2(void) static __inline void load_cr3(u_int data) { - +#ifdef XEN + xen_load_cr3(data); +#else __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); +#endif } static __inline u_int @@ -446,8 +467,11 @@ rcr4(void) static __inline void invltlb(void) { - +#ifdef XEN + xen_tlb_flush(); +#else load_cr3(rcr3()); +#endif } /* @@ -458,7 +482,11 @@ static __inline void invlpg(u_int addr) { +#ifdef XEN + xen_invlpg(addr); +#else __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +#endif } static __inline u_int @@ -662,17 +690,25 @@ load_dr7(u_int dr7) static __inline register_t intr_disable(void) { - register_t eflags; + register_t eflags = 0; +#ifdef XEN + xen_save_and_cli(&eflags); +#else eflags = read_eflags(); disable_intr(); +#endif return (eflags); } static __inline void intr_restore(register_t eflags) { +#ifdef XEN + xen_restore_flags(eflags); +#else write_eflags(eflags); +#endif } #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index c28ae64ca8ca..ea8ff4667758 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -45,6 +45,29 @@ * to each CPU's data can be set up for things like "check curproc on all * other processors" */ + +#ifdef XEN +#define PCPU_MD_FIELDS \ + char pc_monitorbuf[128] __aligned(128); /* cache line */ \ + struct pcpu *pc_prvspace; /* Self-reference */ \ + struct pmap *pc_curpmap; \ + struct i386tss pc_common_tss; \ + struct segment_descriptor pc_common_tssd; \ + struct segment_descriptor *pc_tss_gdt; \ + struct segment_descriptor *pc_fsgs_gdt; \ + vm_paddr_t *pc_pdir_shadow; \ + int pc_currentldt; \ + u_int pc_acpi_id; /* ACPI CPU id */ \ + u_int pc_apic_id; \ + int pc_private_tss; /* Flag indicating private tss*/\ + u_int pc_cr3; /* track cr3 for R1/R3*/ \ + u_int pc_pdir; \ + u_int pc_lazypmap; \ + u_int pc_rendezvous; \ + u_int pc_cpuast + + +#else #define PCPU_MD_FIELDS \ char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ @@ -58,6 +81,7 @@ u_int pc_apic_id; \ int pc_private_tss /* Flag indicating private tss */ +#endif #ifdef lint extern struct pcpu *pcpup; diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index 1cd2bded596d..66e17a96424b 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -198,6 +198,72 @@ extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ #define vtopte(va) (PTmap + i386_btop(va)) #define vtophys(va) pmap_kextract((vm_offset_t)(va)) +#ifdef XEN +#include <sys/param.h> +#include <machine/xen/xen-os.h> +#include <machine/xen/xenvar.h> +#include <machine/xen/xenpmap.h> + +extern pt_entry_t pg_nx; + +#define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M) + +#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma))) +#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m))) + +static __inline vm_paddr_t +pmap_kextract_ma(vm_offset_t va) +{ + vm_paddr_t ma; + if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) { + ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1)); + } else { + ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK); + } + return ma; +} + +static __inline vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + return xpmap_mtop(pmap_kextract_ma(va)); +} +#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va))) + +vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va); + +void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa); +void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len); +void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len); + +static __inline pt_entry_t +pte_load_store(pt_entry_t *ptep, pt_entry_t v) +{ + pt_entry_t r; + + v = xpmap_ptom(v); + r = *ptep; + PT_SET_VA(ptep, v, TRUE); + return (r); +} + +static __inline pt_entry_t +pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v) +{ + pt_entry_t r; + + r = *ptep; + PT_SET_VA_MA(ptep, v, TRUE); + return (r); +} + +#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL) + +#define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte) +#define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte) +#define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte) + +#elif !defined(XEN) /* * Routine: pmap_kextract * Function: @@ -217,8 +283,9 @@ pmap_kextract(vm_offset_t va) } return pa; } +#endif -#ifdef PAE +#if defined(PAE) && !defined(XEN) #define pde_cmpset(pdep, old, new) \ atomic_cmpset_64((pdep), (old), (new)) @@ -277,7 +344,7 @@ atomic_cmpset_64(volatile uint64_t *dst, uint64_t exp, uint64_t src) extern pt_entry_t pg_nx; -#else /* PAE */ +#elif !defined(PAE) && !defined (XEN) #define pde_cmpset(pdep, old, new) \ atomic_cmpset_int((pdep), (old), (new)) diff --git a/sys/i386/include/segments.h b/sys/i386/include/segments.h index 351ff5dff8fe..2edcc59073b4 100644 --- a/sys/i386/include/segments.h +++ b/sys/i386/include/segments.h @@ -47,7 +47,11 @@ */ #define ISPL(s) ((s)&3) /* what is the priority level of a selector */ +#ifdef XEN +#define SEL_KPL 1 /* kernel priority level */ +#else #define SEL_KPL 0 /* kernel priority level */ +#endif #define SEL_UPL 3 /* user priority level */ #define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */ #define SEL_LDT 4 /* local descriptor table */ @@ -222,7 +226,11 @@ struct region_descriptor { #define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */ #define GNDIS_SEL 18 /* For the NDIS layer */ +#ifdef XEN +#define NGDT 9 +#else #define NGDT 19 +#endif /* * Entries in the Local Descriptor Table (LDT) @@ -240,10 +248,15 @@ struct region_descriptor { #ifdef _KERNEL extern int _default_ldt; +#ifdef XEN +extern union descriptor *gdt; +extern union descriptor *ldt; +#else extern union descriptor gdt[]; +extern union descriptor ldt[NLDT]; +#endif extern struct soft_segment_descriptor gdt_segs[]; extern struct gate_descriptor *idt; -extern union descriptor ldt[NLDT]; extern struct region_descriptor r_gdt, r_idt; void lgdt(struct region_descriptor *rdp); diff --git a/sys/i386/include/trap.h b/sys/i386/include/trap.h index f0176b2c97e7..82208d2a39ff 100644 --- a/sys/i386/include/trap.h +++ b/sys/i386/include/trap.h @@ -49,6 +49,9 @@ #define T_PAGEFLT 12 /* page fault */ #define T_ALIGNFLT 14 /* alignment fault */ +#ifdef XEN +#define T_HYPCALLBACK 17 /* hypervisor upcall */ +#endif #define T_DIVIDE 18 /* integer divide fault */ #define T_NMI 19 /* non-maskable trap */ #define T_OFLOW 20 /* overflow trap */ diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index 3d34419b9844..f114232e90bb 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -145,8 +145,12 @@ * Kernel physical load address. */ #ifndef KERNLOAD +#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST) +#define KERNLOAD 0 +#else #define KERNLOAD (1 << PDRSHIFT) #endif +#endif /* !defined(KERNLOAD) */ /* * Virtual addresses of things. Derived from the page directory and @@ -155,7 +159,12 @@ * messy at times, but hey, we'll do anything to save a page :-) */ +#ifdef XEN +#define VM_MAX_KERNEL_ADDRESS HYPERVISOR_VIRT_START +#else #define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1) +#endif + #define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI) #define KERNBASE VADDR(KPTDI, 0) diff --git a/sys/i386/pci/pci_cfgreg.c b/sys/i386/pci/pci_cfgreg.c index e8be30547ec1..6c78fd5488de 100644 --- a/sys/i386/pci/pci_cfgreg.c +++ b/sys/i386/pci/pci_cfgreg.c @@ -82,9 +82,11 @@ static struct mtx pcicfg_mtx; static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes); +#ifndef XEN static int pcireg_cfgopen(void); static int pciereg_cfgopen(void); +#endif static int pciereg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pciereg_cfgwrite(int bus, int slot, int func, int reg, @@ -105,6 +107,7 @@ pci_i386_map_intline(int line) return (line); } +#ifndef XEN static u_int16_t pcibios_get_version(void) { @@ -125,6 +128,7 @@ pcibios_get_version(void) } return (args.ebx & 0xffff); } +#endif /* * Initialise access to PCI configuration space @@ -132,6 +136,9 @@ pcibios_get_version(void) int pci_cfgregopen(void) { +#ifdef XEN + return (0); +#else static int opened = 0; u_int16_t vid, did; u_int16_t v; @@ -175,6 +182,7 @@ pci_cfgregopen(void) } return(1); +#endif } /* @@ -353,6 +361,7 @@ pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes) mtx_unlock_spin(&pcicfg_mtx); } +#ifndef XEN /* check whether the configuration mechanism has been correctly identified */ static int pci_cfgcheck(int maxdev) @@ -530,6 +539,7 @@ pciereg_cfgopen(void) devmax = 32; return (1); } +#endif /* !XEN */ #define PCIE_PADDR(bar, reg, bus, slot, func) \ ((bar) | \ diff --git a/sys/i386/pci/pci_pir.c b/sys/i386/pci/pci_pir.c index 52fb62fbb6ec..c275de094f5c 100644 --- a/sys/i386/pci/pci_pir.c +++ b/sys/i386/pci/pci_pir.c @@ -138,6 +138,9 @@ pci_pir_open(void) int i; uint8_t ck, *cv; +#ifdef XEN + return; +#else /* Don't try if we've already found a table. */ if (pci_route_table != NULL) return; @@ -148,7 +151,7 @@ pci_pir_open(void) sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0); if (sigaddr == 0) return; - +#endif /* If we found something, check the checksum and length. */ /* XXX - Use pmap_mapdev()? */ pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); @@ -479,7 +482,11 @@ pci_pir_biosroute(int bus, int device, int func, int pin, int irq) args.eax = PCIBIOS_ROUTE_INTERRUPT; args.ebx = (bus << 8) | (device << 3) | func; args.ecx = (irq << 8) | (0xa + pin); +#ifdef XEN + return (0); +#else return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))); +#endif } |