aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/i386/i386/busdma_machdep.c5
-rw-r--r--sys/i386/i386/genassym.c6
-rw-r--r--sys/i386/i386/machdep.c314
-rw-r--r--sys/i386/i386/support.s4
-rw-r--r--sys/i386/i386/sys_machdep.c96
-rw-r--r--sys/i386/i386/trap.c15
-rw-r--r--sys/i386/i386/vm_machdep.c22
-rw-r--r--sys/i386/include/asmacros.h57
-rw-r--r--sys/i386/include/cpufunc.h44
-rw-r--r--sys/i386/include/pcpu.h24
-rw-r--r--sys/i386/include/pmap.h71
-rw-r--r--sys/i386/include/segments.h15
-rw-r--r--sys/i386/include/trap.h3
-rw-r--r--sys/i386/include/vmparam.h9
-rw-r--r--sys/i386/pci/pci_cfgreg.c10
-rw-r--r--sys/i386/pci/pci_pir.c9
16 files changed, 687 insertions, 17 deletions
diff --git a/sys/i386/i386/busdma_machdep.c b/sys/i386/i386/busdma_machdep.c
index 7069d2d5ac78..4ff4f920abb3 100644
--- a/sys/i386/i386/busdma_machdep.c
+++ b/sys/i386/i386/busdma_machdep.c
@@ -144,6 +144,11 @@ int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
int _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
bus_size_t buflen, int flags);
+#ifdef XEN
+#undef pmap_kextract
+#define pmap_kextract pmap_kextract_ma
+#endif
+
/*
* Return true if a match is made.
*
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index c1df8db791af..4a76e0ae8b21 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -228,3 +228,9 @@ ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
#endif
+
+#ifdef XEN
+#include <machine/xen/hypervisor.h>
+ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
+ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START);
+#endif
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index e38015f35dd4..d2d3e7c6a933 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -141,6 +141,25 @@ int arch_i386_is_xbox = 0;
uint32_t arch_i386_xbox_memsize = 0;
#endif
+#ifdef XEN
+/* XEN includes */
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+#include <machine/xen/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+extern trap_info_t trap_table[];
+struct proc_ldt default_proc_ldt;
+extern int init_first;
+int running_xen = 1;
+extern unsigned long physfree;
+#endif /* XEN */
+
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
@@ -287,8 +306,9 @@ cpu_startup(dummy)
*/
bufinit();
vm_pager_bufferinit();
-
+#ifndef XEN
cpu_setregs();
+#endif
}
/*
@@ -1118,6 +1138,24 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
return (0);
}
+
+void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+
+#ifdef XEN
+
+void
+cpu_halt(void)
+{
+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+static void
+cpu_idle_hlt(int busy)
+{
+ idle_block();
+}
+
+#else
/*
* Shutdown the CPU as much as possible
*/
@@ -1128,8 +1166,6 @@ cpu_halt(void)
__asm__ ("hlt");
}
-void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
-
static void
cpu_idle_hlt(int busy)
{
@@ -1143,6 +1179,7 @@ cpu_idle_hlt(int busy)
else
__asm __volatile("sti; hlt");
}
+#endif
static void
cpu_idle_acpi(int busy)
@@ -1437,10 +1474,16 @@ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
*/
int _default_ldt;
+
+#ifdef XEN
+union descriptor *gdt;
+union descriptor *ldt;
+#else
union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
+union descriptor ldt[NLDT]; /* local descriptor table */
+#endif
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
-union descriptor ldt[NLDT]; /* local descriptor table */
struct region_descriptor r_gdt, r_idt; /* table descriptors */
struct mtx dt_lock; /* lock for GDT and LDT */
@@ -1542,6 +1585,7 @@ struct soft_segment_descriptor gdt_segs[] = {
0, 0,
1, /* default 32 vs 16 bit size */
1 /* limit granularity (byte/page units)*/ },
+#ifndef XEN
/* GPROC0_SEL 9 Proc 0 Tss Descriptor */
{
0x0, /* segment base address */
@@ -1633,6 +1677,7 @@ struct soft_segment_descriptor gdt_segs[] = {
0, 0,
0, /* default 32 vs 16 bit size */
0 /* limit granularity (byte/page units)*/ },
+#endif /* !XEN */
};
static struct soft_segment_descriptor ldt_segs[] = {
@@ -1870,7 +1915,16 @@ getmemsize(int first)
goto physmap_done;
}
#endif
-
+#if defined(XEN)
+ has_smap = 0;
+ Maxmem = xen_start_info->nr_pages - init_first;
+ physmem = Maxmem;
+ basemem = 0;
+ physmap[0] = init_first << PAGE_SHIFT;
+ physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
+ physmap_idx = 0;
+ goto physmap_done;
+#endif
hasbrokenint12 = 0;
TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
bzero(&vmf, sizeof(vmf));
@@ -2032,7 +2086,7 @@ int15e820:
vmf.vmf_ah = 0x88;
vm86_intcall(0x15, &vmf);
extmem = vmf.vmf_ax;
-#else
+#elif !defined(XEN)
/*
* Prefer the RTC value for extended memory.
*/
@@ -2123,6 +2177,7 @@ physmap_done:
getenv_quad("dcons.size", &dcons_size) == 0)
dcons_addr = 0;
+#ifndef XEN
/*
* physmap is in bytes, so when converting to page boundaries,
* round up the start address and round down the end address.
@@ -2240,7 +2295,11 @@ do_next:
}
*pte = 0;
invltlb();
-
+#else
+ phys_avail[0] = physfree;
+ phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
+#endif
+
/*
* XXX
* The last chunk must contain at least one page plus the message
@@ -2265,6 +2324,246 @@ do_next:
off);
}
+#ifdef XEN
+#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
+
+void
+init386(first)
+ int first;
+{
+ struct gate_descriptor *gdp;
+ unsigned long gdtmachpfn;
+ int error, gsel_tss, metadata_missing, x;
+ struct pcpu *pc;
+ struct callback_register event = {
+ .type = CALLBACKTYPE_event,
+ .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
+ };
+ struct callback_register failsafe = {
+ .type = CALLBACKTYPE_failsafe,
+ .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
+ };
+
+ thread0.td_kstack = proc0kstack;
+ thread0.td_pcb = (struct pcb *)
+ (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+
+ /*
+ * This may be done better later if it gets more high level
+ * components in it. If so just link td->td_proc here.
+ */
+ proc_linkup0(&proc0, &thread0);
+
+ metadata_missing = 0;
+ if (xen_start_info->mod_start) {
+ preload_metadata = (caddr_t)xen_start_info->mod_start;
+ preload_bootstrap_relocate(KERNBASE);
+ } else {
+ metadata_missing = 1;
+ }
+ if (envmode == 1)
+ kern_envp = static_env;
+ else if (bootinfo.bi_envp)
+ kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
+
+ /* Init basic tunables, hz etc */
+ init_param1();
+
+ /*
+ * XEN occupies a portion of the upper virtual address space
+ * At its base it manages an array mapping machine page frames
+ * to physical page frames - hence we need to be able to
+ * access 4GB - (64MB - 4MB + 64k)
+ */
+ gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
+ gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
+ gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
+ gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
+ gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
+ gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
+
+ pc = &__pcpu[0];
+ gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
+ gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+
+ PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
+ bzero(gdt, PAGE_SIZE);
+ for (x = 0; x < NGDT; x++)
+ ssdtosd(&gdt_segs[x], &gdt[x].sd);
+
+ mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
+
+ gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+ PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~(PG_RW|PG_M|PG_A));
+ PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
+ lgdt(&r_gdt);
+ gdtset = 1;
+
+ if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
+ panic("set_trap_table failed - error %d\n", error);
+ }
+
+ error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
+ if (error == 0)
+ error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
+#if CONFIG_XEN_COMPAT <= 0x030002
+ if (error == -ENOXENSYS)
+ HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
+ (unsigned long)Xhypervisor_callback,
+ GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
+#endif
+ pcpu_init(pc, 0, sizeof(struct pcpu));
+ PCPU_SET(prvspace, pc);
+ PCPU_SET(curthread, &thread0);
+ PCPU_SET(curpcb, thread0.td_pcb);
+
+ /*
+ * Initialize mutexes.
+ *
+ * icu_lock: in order to allow an interrupt to occur in a critical
+ * section, to set pcpu->ipending (etc...) properly, we
+ * must be able to get the icu lock, so it can't be
+ * under witness.
+ */
+ mutex_init();
+ mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
+
+ /* make ldt memory segments */
+ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
+ ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
+ for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
+ ssdtosd(&ldt_segs[x], &ldt[x].sd);
+
+ default_proc_ldt.ldt_base = (caddr_t)ldt;
+ default_proc_ldt.ldt_len = 6;
+ _default_ldt = (int)&default_proc_ldt;
+ PCPU_SET(currentldt, _default_ldt)
+ PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
+ xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
+
+#if defined(XEN_PRIVILEGED)
+ /*
+ * Initialize the i8254 before the console so that console
+ * initialization can use DELAY().
+ */
+ i8254_init();
+#endif
+
+ /*
+ * Initialize the console before we print anything out.
+ */
+ cninit();
+
+ if (metadata_missing)
+ printf("WARNING: loader(8) metadata is missing!\n");
+
+#ifdef DEV_ISA
+ elcr_probe();
+ atpic_startup();
+#endif
+
+#ifdef DDB
+ ksym_start = bootinfo.bi_symtab;
+ ksym_end = bootinfo.bi_esymtab;
+#endif
+
+ kdb_init();
+
+#ifdef KDB
+ if (boothowto & RB_KDB)
+ kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
+#endif
+
+ finishidentcpu(); /* Final stage of CPU initialization */
+ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ initializecpu(); /* Initialize CPU registers */
+
+ /* make an initial tss so cpu can get interrupt stack on syscall! */
+ /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
+ KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
+ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+ PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
+ PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+ PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+ ltr(gsel_tss);
+
+ /* pointer to selector slot for %fs/%gs */
+ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
+
+ dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
+ dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
+ dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
+ dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#ifdef PAE
+ dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
+ dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
+ dblfault_tss.tss_eip = (int)dblfault_handler;
+ dblfault_tss.tss_eflags = PSL_KERNEL;
+ dblfault_tss.tss_ds = dblfault_tss.tss_es =
+ dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+ dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+ dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+ dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+
+ vm86_initialize();
+ getmemsize(first);
+ init_param2(physmem);
+
+ /* now running on new page tables, configured,and u/iom is accessible */
+
+ msgbufinit(msgbufp, MSGBUF_SIZE);
+
+ /* make a call gate to reenter kernel with */
+ gdp = &ldt[LSYS5CALLS_SEL].gd;
+
+ x = (int) &IDTVEC(lcall_syscall);
+ gdp->gd_looffset = x;
+ gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
+ gdp->gd_stkcpy = 1;
+ gdp->gd_type = SDT_SYS386CGT;
+ gdp->gd_dpl = SEL_UPL;
+ gdp->gd_p = 1;
+ gdp->gd_hioffset = x >> 16;
+
+ /* XXX does this work? */
+ /* XXX yes! */
+ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
+ ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
+
+ /* transfer to user mode */
+
+ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
+ _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
+
+ /* setup proc 0's pcb */
+ thread0.td_pcb->pcb_flags = 0;
+#ifdef PAE
+ thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
+#else
+ thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+#endif
+ thread0.td_pcb->pcb_ext = 0;
+ thread0.td_frame = &proc0_tf;
+ thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
+ thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
+}
+
+#else
void
init386(first)
int first;
@@ -2527,6 +2826,7 @@ init386(first)
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
}
+#endif
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 5c8cd857c37d..cbf0d8280739 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -1459,10 +1459,12 @@ END(bcmp)
*/
/* void lgdt(struct region_descriptor *rdp); */
ENTRY(lgdt)
+#ifndef XEN
/* reload the descriptor table */
movl 4(%esp),%eax
lgdt (%eax)
-
+#endif
+
/* flush the prefetch q */
jmp 1f
nop
diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c
index 97ebe3477be4..f792f2a5d2a6 100644
--- a/sys/i386/i386/sys_machdep.c
+++ b/sys/i386/i386/sys_machdep.c
@@ -58,6 +58,20 @@ __FBSDID("$FreeBSD$");
#include <security/audit/audit.h>
+#ifdef XEN
+#include <machine/xen/xenfunc.h>
+
+void i386_reset_ldt(struct proc_ldt *pldt);
+
+void
+i386_reset_ldt(struct proc_ldt *pldt)
+{
+ xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
+}
+#else
+#define i386_reset_ldt(x)
+#endif
+
#include <vm/vm_kern.h> /* for kernel_map */
#define MAX_LD 8192
@@ -164,7 +178,12 @@ sysarch(td, uap)
*/
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
+#ifdef XEN
+ /* need to do nosegneg like Linux */
+ sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
+#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
+#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -174,7 +193,12 @@ sysarch(td, uap)
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_fsd = sd;
+#ifdef XEN
+ HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]),
+ *(uint64_t *)&sd);
+#else
PCPU_GET(fsgs_gdt)[0] = sd;
+#endif
critical_exit();
td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
}
@@ -194,7 +218,13 @@ sysarch(td, uap)
*/
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
+
+#ifdef XEN
+ /* need to do nosegneg like Linux */
+ sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
+#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
+#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -204,7 +234,12 @@ sysarch(td, uap)
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_gsd = sd;
+#ifdef XEN
+ HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]),
+ *(uint64_t *)&sd);
+#else
PCPU_GET(fsgs_gdt)[1] = sd;
+#endif
critical_exit();
load_gs(GSEL(GUGS_SEL, SEL_UPL));
}
@@ -360,6 +395,10 @@ set_user_ldt(struct mdproc *mdp)
}
pldt = mdp->md_ldt;
+#ifdef XEN
+ i386_reset_ldt(pldt);
+ PCPU_SET(currentldt, (int)pldt);
+#else
#ifdef SMP
gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
#else
@@ -367,6 +406,7 @@ set_user_ldt(struct mdproc *mdp)
#endif
lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
+#endif /* XEN */
if (dtlocked)
mtx_unlock_spin(&dt_lock);
}
@@ -385,6 +425,44 @@ set_user_ldt_rv(struct vmspace *vmsp)
}
#endif
+#ifdef XEN
+
+/*
+ * dt_lock must be held. Returns with dt_lock held.
+ */
+struct proc_ldt *
+user_ldt_alloc(struct mdproc *mdp, int len)
+{
+ struct proc_ldt *pldt, *new_ldt;
+
+ mtx_assert(&dt_lock, MA_OWNED);
+ mtx_unlock_spin(&dt_lock);
+ MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt),
+ M_SUBPROC, M_WAITOK);
+
+ new_ldt->ldt_len = len = NEW_MAX_LD(len);
+ new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
+ round_page(len * sizeof(union descriptor)));
+ if (new_ldt->ldt_base == NULL) {
+ FREE(new_ldt, M_SUBPROC);
+ return NULL;
+ }
+ new_ldt->ldt_refcnt = 1;
+ new_ldt->ldt_active = 0;
+
+ if ((pldt = mdp->md_ldt)) {
+ if (len > pldt->ldt_len)
+ len = pldt->ldt_len;
+ bcopy(pldt->ldt_base, new_ldt->ldt_base,
+ len * sizeof(union descriptor));
+ } else {
+ bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
+ }
+ pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
+ new_ldt->ldt_len*sizeof(union descriptor));
+ return new_ldt;
+}
+#else
/*
* dt_lock must be held. Returns with dt_lock held.
*/
@@ -423,6 +501,7 @@ user_ldt_alloc(struct mdproc *mdp, int len)
return (new_ldt);
}
+#endif /* !XEN */
/*
* Must be called with dt_lock held. Returns with dt_lock unheld.
@@ -667,7 +746,23 @@ again:
td->td_retval[0] = uap->start;
return (error);
}
+#ifdef XEN
+static int
+i386_set_ldt_data(struct thread *td, int start, int num,
+ union descriptor *descs)
+{
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt = mdp->md_ldt;
+ mtx_assert(&dt_lock, MA_OWNED);
+
+ /* Fill in range */
+ bcopy(descs,
+ &((union descriptor *)(pldt->ldt_base))[start],
+ num * sizeof(union descriptor));
+ return (0);
+}
+#else
static int
i386_set_ldt_data(struct thread *td, int start, int num,
union descriptor *descs)
@@ -683,6 +778,7 @@ i386_set_ldt_data(struct thread *td, int start, int num,
num * sizeof(union descriptor));
return (0);
}
+#endif /* !XEN */
static int
i386_ldt_grow(struct thread *td, int len)
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 3306e7c8dbb8..999e73d389cb 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -123,6 +123,14 @@ dtrace_doubletrap_func_t dtrace_doubletrap_func;
systrace_probe_func_t systrace_probe_func;
#endif
+#ifdef XEN
+#include <sys/syslog.h>
+#include <machine/xen/xenfunc.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/evtchn.h>
+#endif
+
extern void trap(struct trapframe *frame);
extern void syscall(struct trapframe *frame);
@@ -151,7 +159,11 @@ static char *trap_msg[] = {
"alignment fault", /* 14 T_ALIGNFLT */
"", /* 15 unused */
"", /* 16 unused */
+#ifdef XEN
+ "hypervisor callback", /* 17 T_HYPCALLBACK*/
+#else
"", /* 17 unused */
+#endif
"integer divide fault", /* 18 T_DIVIDE */
"non-maskable interrupt trap", /* 19 T_NMI */
"overflow trap", /* 20 T_OFLOW */
@@ -258,6 +270,7 @@ trap(struct trapframe *frame)
goto out;
#endif
+#ifndef XEN
if ((frame->tf_eflags & PSL_I) == 0) {
/*
* Buggy application or kernel code has disabled
@@ -288,7 +301,7 @@ trap(struct trapframe *frame)
enable_intr();
}
}
-
+#endif /* !XEN */
eva = 0;
code = frame->tf_err;
if (type == T_PAGEFLT) {
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index c5d381c42aa0..a4b1c5c70bd2 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -88,6 +88,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_param.h>
+#ifdef XEN
+#include <machine/xen/hypervisor.h>
+#endif
#ifdef PC98
#include <pc98/cbus/cbus.h>
#else
@@ -264,6 +267,9 @@ cpu_fork(td1, p2, td2, flags)
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
+ /*
+ * XXX XEN need to check on PSL_USER is handled
+ */
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
/*
@@ -594,6 +600,9 @@ cpu_reset_real()
#endif
disable_intr();
+#ifdef XEN
+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+#endif
#ifdef CPU_ELAN
if (elan_mmcr != NULL)
elan_mmcr->RESCFG = 1;
@@ -759,7 +768,12 @@ sf_buf_alloc(struct vm_page *m, int flags)
*/
ptep = vtopte(sf->kva);
opte = *ptep;
+#ifdef XEN
+ PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
+ | PG_RW | PG_V);
+#else
*ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V;
+#endif
/*
* Avoid unnecessary TLB invalidations: If the sf_buf's old
@@ -809,6 +823,14 @@ sf_buf_free(struct sf_buf *sf)
if (sf->ref_count == 0) {
TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
nsfbufsused--;
+#ifdef XEN
+/*
+ * Xen doesn't like having dangling R/W mappings
+ */
+ pmap_qremove(sf->kva, 1);
+ sf->m = NULL;
+ LIST_REMOVE(sf, list_entry);
+#endif
if (sf_buf_alloc_want > 0)
wakeup_one(&sf_buf_freelist);
}
diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h
index fbc93b7d69d5..c1c3f645cbf5 100644
--- a/sys/i386/include/asmacros.h
+++ b/sys/i386/include/asmacros.h
@@ -176,6 +176,63 @@
movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
movl %eax, %fs
+#ifdef XEN
+#define LOAD_CR3(reg) \
+ movl reg,PCPU(CR3); \
+ pushl %ecx ; \
+ pushl %edx ; \
+ pushl %esi ; \
+ pushl reg ; \
+ call xen_load_cr3 ; \
+ addl $4,%esp ; \
+ popl %esi ; \
+ popl %edx ; \
+ popl %ecx ; \
+
+#define READ_CR3(reg) movl PCPU(CR3),reg;
+#define LLDT(arg) \
+ pushl %edx ; \
+ pushl %eax ; \
+ xorl %eax,%eax ; \
+ movl %eax,%gs ; \
+ call i386_reset_ldt ; \
+ popl %eax ; \
+ popl %edx
+#define CLI call ni_cli
+#else
+#define LOAD_CR3(reg) movl reg,%cr3;
+#define READ_CR3(reg) movl %cr3,reg;
+#define LLDT(arg) lldt arg;
+#define CLI cli
+#endif /* !XEN */
+
+
#endif /* LOCORE */
+#ifdef __STDC__
+#define ELFNOTE(name, type, desctype, descdata...) \
+.pushsection .note.name ; \
+ .align 4 ; \
+ .long 2f - 1f /* namesz */ ; \
+ .long 4f - 3f /* descsz */ ; \
+ .long type ; \
+1:.asciz #name ; \
+2:.align 4 ; \
+3:desctype descdata ; \
+4:.align 4 ; \
+.popsection
+#else /* !__STDC__, i.e. -traditional */
+#define ELFNOTE(name, type, desctype, descdata) \
+.pushsection .note.name ; \
+ .align 4 ; \
+ .long 2f - 1f /* namesz */ ; \
+ .long 4f - 3f /* descsz */ ; \
+ .long type ; \
+1:.asciz "name" ; \
+2:.align 4 ; \
+3:desctype descdata ; \
+4:.align 4 ; \
+.popsection
+#endif /* __STDC__ */
+
#endif /* !_MACHINE_ASMACROS_H_ */
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index fc61eab9b5f5..948bde1c3079 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -42,6 +42,16 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
+#ifdef XEN
+extern void xen_cli(void);
+extern void xen_sti(void);
+extern void xen_load_cr3(u_int data);
+extern void xen_tlb_flush(void);
+extern void xen_invlpg(u_int addr);
+extern void xen_save_and_cli(u_int *eflags);
+extern void xen_restore_flags(u_int eflags);
+#endif
+
struct region_descriptor;
#define readb(va) (*(volatile u_int8_t *) (va))
@@ -81,7 +91,11 @@ bsrl(u_int mask)
static __inline void
disable_intr(void)
{
- __asm __volatile("cli" : : : "memory");
+#ifdef XEN
+ xen_cli();
+#else
+ __asm __volatile("cli" : : : "memory");
+#endif
}
static __inline void
@@ -103,7 +117,11 @@ cpuid_count(u_int ax, u_int cx, u_int *p)
static __inline void
enable_intr(void)
{
+#ifdef XEN
+ xen_sti();
+#else
__asm __volatile("sti");
+#endif
}
static inline void
@@ -412,8 +430,11 @@ rcr2(void)
static __inline void
load_cr3(u_int data)
{
-
+#ifdef XEN
+ xen_load_cr3(data);
+#else
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
+#endif
}
static __inline u_int
@@ -446,8 +467,11 @@ rcr4(void)
static __inline void
invltlb(void)
{
-
+#ifdef XEN
+ xen_tlb_flush();
+#else
load_cr3(rcr3());
+#endif
}
/*
@@ -458,7 +482,11 @@ static __inline void
invlpg(u_int addr)
{
+#ifdef XEN
+ xen_invlpg(addr);
+#else
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
+#endif
}
static __inline u_int
@@ -662,17 +690,25 @@ load_dr7(u_int dr7)
static __inline register_t
intr_disable(void)
{
- register_t eflags;
+ register_t eflags = 0;
+#ifdef XEN
+ xen_save_and_cli(&eflags);
+#else
eflags = read_eflags();
disable_intr();
+#endif
return (eflags);
}
static __inline void
intr_restore(register_t eflags)
{
+#ifdef XEN
+ xen_restore_flags(eflags);
+#else
write_eflags(eflags);
+#endif
}
#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index c28ae64ca8ca..ea8ff4667758 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -45,6 +45,29 @@
* to each CPU's data can be set up for things like "check curproc on all
* other processors"
*/
+
+#ifdef XEN
+#define PCPU_MD_FIELDS \
+ char pc_monitorbuf[128] __aligned(128); /* cache line */ \
+ struct pcpu *pc_prvspace; /* Self-reference */ \
+ struct pmap *pc_curpmap; \
+ struct i386tss pc_common_tss; \
+ struct segment_descriptor pc_common_tssd; \
+ struct segment_descriptor *pc_tss_gdt; \
+ struct segment_descriptor *pc_fsgs_gdt; \
+ vm_paddr_t *pc_pdir_shadow; \
+ int pc_currentldt; \
+ u_int pc_acpi_id; /* ACPI CPU id */ \
+ u_int pc_apic_id; \
+ int pc_private_tss; /* Flag indicating private tss*/\
+ u_int pc_cr3; /* track cr3 for R1/R3*/ \
+ u_int pc_pdir; \
+ u_int pc_lazypmap; \
+ u_int pc_rendezvous; \
+ u_int pc_cpuast
+
+
+#else
#define PCPU_MD_FIELDS \
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
@@ -58,6 +81,7 @@
u_int pc_apic_id; \
int pc_private_tss /* Flag indicating private tss */
+#endif
#ifdef lint
extern struct pcpu *pcpup;
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 1cd2bded596d..66e17a96424b 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -198,6 +198,72 @@ extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
#define vtopte(va) (PTmap + i386_btop(va))
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
+#ifdef XEN
+#include <sys/param.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenpmap.h>
+
+extern pt_entry_t pg_nx;
+
+#define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M)
+
+#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
+#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
+
+static __inline vm_paddr_t
+pmap_kextract_ma(vm_offset_t va)
+{
+ vm_paddr_t ma;
+ if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
+ ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
+ } else {
+ ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
+ }
+ return ma;
+}
+
+static __inline vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+ return xpmap_mtop(pmap_kextract_ma(va));
+}
+#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va)))
+
+vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
+
+void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
+void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
+void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
+
+static __inline pt_entry_t
+pte_load_store(pt_entry_t *ptep, pt_entry_t v)
+{
+ pt_entry_t r;
+
+ v = xpmap_ptom(v);
+ r = *ptep;
+ PT_SET_VA(ptep, v, TRUE);
+ return (r);
+}
+
+static __inline pt_entry_t
+pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
+{
+ pt_entry_t r;
+
+ r = *ptep;
+ PT_SET_VA_MA(ptep, v, TRUE);
+ return (r);
+}
+
+#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL)
+
+#define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte)
+#define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
+#define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
+
+#elif !defined(XEN)
/*
* Routine: pmap_kextract
* Function:
@@ -217,8 +283,9 @@ pmap_kextract(vm_offset_t va)
}
return pa;
}
+#endif
-#ifdef PAE
+#if defined(PAE) && !defined(XEN)
#define pde_cmpset(pdep, old, new) \
atomic_cmpset_64((pdep), (old), (new))
@@ -277,7 +344,7 @@ atomic_cmpset_64(volatile uint64_t *dst, uint64_t exp, uint64_t src)
extern pt_entry_t pg_nx;
-#else /* PAE */
+#elif !defined(PAE) && !defined (XEN)
#define pde_cmpset(pdep, old, new) \
atomic_cmpset_int((pdep), (old), (new))
diff --git a/sys/i386/include/segments.h b/sys/i386/include/segments.h
index 351ff5dff8fe..2edcc59073b4 100644
--- a/sys/i386/include/segments.h
+++ b/sys/i386/include/segments.h
@@ -47,7 +47,11 @@
*/
#define ISPL(s) ((s)&3) /* what is the priority level of a selector */
+#ifdef XEN
+#define SEL_KPL 1 /* kernel priority level */
+#else
#define SEL_KPL 0 /* kernel priority level */
+#endif
#define SEL_UPL 3 /* user priority level */
#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
#define SEL_LDT 4 /* local descriptor table */
@@ -222,7 +226,11 @@ struct region_descriptor {
#define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */
#define GNDIS_SEL 18 /* For the NDIS layer */
+#ifdef XEN
+#define NGDT 9
+#else
#define NGDT 19
+#endif
/*
* Entries in the Local Descriptor Table (LDT)
@@ -240,10 +248,15 @@ struct region_descriptor {
#ifdef _KERNEL
extern int _default_ldt;
+#ifdef XEN
+extern union descriptor *gdt;
+extern union descriptor *ldt;
+#else
extern union descriptor gdt[];
+extern union descriptor ldt[NLDT];
+#endif
extern struct soft_segment_descriptor gdt_segs[];
extern struct gate_descriptor *idt;
-extern union descriptor ldt[NLDT];
extern struct region_descriptor r_gdt, r_idt;
void lgdt(struct region_descriptor *rdp);
diff --git a/sys/i386/include/trap.h b/sys/i386/include/trap.h
index f0176b2c97e7..82208d2a39ff 100644
--- a/sys/i386/include/trap.h
+++ b/sys/i386/include/trap.h
@@ -49,6 +49,9 @@
#define T_PAGEFLT 12 /* page fault */
#define T_ALIGNFLT 14 /* alignment fault */
+#ifdef XEN
+#define T_HYPCALLBACK 17 /* hypervisor upcall */
+#endif
#define T_DIVIDE 18 /* integer divide fault */
#define T_NMI 19 /* non-maskable trap */
#define T_OFLOW 20 /* overflow trap */
diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h
index 3d34419b9844..f114232e90bb 100644
--- a/sys/i386/include/vmparam.h
+++ b/sys/i386/include/vmparam.h
@@ -145,8 +145,12 @@
* Kernel physical load address.
*/
#ifndef KERNLOAD
+#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST)
+#define KERNLOAD 0
+#else
#define KERNLOAD (1 << PDRSHIFT)
#endif
+#endif /* !defined(KERNLOAD) */
/*
* Virtual addresses of things. Derived from the page directory and
@@ -155,7 +159,12 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
+#ifdef XEN
+#define VM_MAX_KERNEL_ADDRESS HYPERVISOR_VIRT_START
+#else
#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
+#endif
+
#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define KERNBASE VADDR(KPTDI, 0)
diff --git a/sys/i386/pci/pci_cfgreg.c b/sys/i386/pci/pci_cfgreg.c
index e8be30547ec1..6c78fd5488de 100644
--- a/sys/i386/pci/pci_cfgreg.c
+++ b/sys/i386/pci/pci_cfgreg.c
@@ -82,9 +82,11 @@ static struct mtx pcicfg_mtx;
static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
+#ifndef XEN
static int pcireg_cfgopen(void);
static int pciereg_cfgopen(void);
+#endif
static int pciereg_cfgread(int bus, int slot, int func, int reg,
int bytes);
static void pciereg_cfgwrite(int bus, int slot, int func, int reg,
@@ -105,6 +107,7 @@ pci_i386_map_intline(int line)
return (line);
}
+#ifndef XEN
static u_int16_t
pcibios_get_version(void)
{
@@ -125,6 +128,7 @@ pcibios_get_version(void)
}
return (args.ebx & 0xffff);
}
+#endif
/*
* Initialise access to PCI configuration space
@@ -132,6 +136,9 @@ pcibios_get_version(void)
int
pci_cfgregopen(void)
{
+#ifdef XEN
+ return (0);
+#else
static int opened = 0;
u_int16_t vid, did;
u_int16_t v;
@@ -175,6 +182,7 @@ pci_cfgregopen(void)
}
return(1);
+#endif
}
/*
@@ -353,6 +361,7 @@ pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
mtx_unlock_spin(&pcicfg_mtx);
}
+#ifndef XEN
/* check whether the configuration mechanism has been correctly identified */
static int
pci_cfgcheck(int maxdev)
@@ -530,6 +539,7 @@ pciereg_cfgopen(void)
devmax = 32;
return (1);
}
+#endif /* !XEN */
#define PCIE_PADDR(bar, reg, bus, slot, func) \
((bar) | \
diff --git a/sys/i386/pci/pci_pir.c b/sys/i386/pci/pci_pir.c
index 52fb62fbb6ec..c275de094f5c 100644
--- a/sys/i386/pci/pci_pir.c
+++ b/sys/i386/pci/pci_pir.c
@@ -138,6 +138,9 @@ pci_pir_open(void)
int i;
uint8_t ck, *cv;
+#ifdef XEN
+ return;
+#else
/* Don't try if we've already found a table. */
if (pci_route_table != NULL)
return;
@@ -148,7 +151,7 @@ pci_pir_open(void)
sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
if (sigaddr == 0)
return;
-
+#endif
/* If we found something, check the checksum and length. */
/* XXX - Use pmap_mapdev()? */
pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
@@ -479,7 +482,11 @@ pci_pir_biosroute(int bus, int device, int func, int pin, int irq)
args.eax = PCIBIOS_ROUTE_INTERRUPT;
args.ebx = (bus << 8) | (device << 3) | func;
args.ecx = (irq << 8) | (0xa + pin);
+#ifdef XEN
+ return (0);
+#else
return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
+#endif
}