diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/sparc64/include/pmap.h | 13 | ||||
-rw-r--r-- | sys/sparc64/include/tsb.h | 1 | ||||
-rw-r--r-- | sys/sparc64/sparc64/exception.S | 120 | ||||
-rw-r--r-- | sys/sparc64/sparc64/genassym.c | 1 | ||||
-rw-r--r-- | sys/sparc64/sparc64/mp_machdep.c | 9 | ||||
-rw-r--r-- | sys/sparc64/sparc64/pmap.c | 254 | ||||
-rw-r--r-- | sys/sparc64/sparc64/tsb.c | 8 |
7 files changed, 294 insertions, 112 deletions
diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index 91e15cbbf774..a1957243d119 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -61,18 +61,18 @@ struct pmap { struct mtx pm_mtx; struct tte *pm_tsb; vm_object_t pm_tsb_obj; - cpumask_t pm_active; - uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ - u_int pm_retries; + cpumask_t pm_active; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; u_int pm_context[MAXCPU]; struct pmap_statistics pm_stats; }; #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) -#define PMAP_LOCK_ASSERT(pmap, type) \ +#define PMAP_LOCK_ASSERT(pmap, type) \ mtx_assert(&(pmap)->pm_mtx, (type)) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) -#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ +#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) @@ -99,6 +99,7 @@ int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2, struct tte *tp, vm_offset_t va); void pmap_map_tsb(void); +void pmap_set_kctx(void); #define vtophys(va) pmap_kextract((vm_offset_t)(va)) @@ -114,7 +115,7 @@ SYSCTL_DECL(_debug_pmap_stats); #define PMAP_STATS_VAR(name) \ static long name; \ - SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, name, CTLFLAG_RW, \ + SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, name, CTLFLAG_RW, \ &name, 0, "") #define PMAP_STATS_INC(var) \ diff --git a/sys/sparc64/include/tsb.h b/sys/sparc64/include/tsb.h index 1fd1e8f758b4..5fa1190f0193 100644 --- a/sys/sparc64/include/tsb.h +++ b/sys/sparc64/include/tsb.h @@ -50,6 +50,7 @@ extern struct tte *tsb_kernel; extern vm_size_t tsb_kernel_mask; extern vm_size_t tsb_kernel_size; extern vm_paddr_t tsb_kernel_phys; +extern u_int tsb_kernel_ldd_phys; static __inline struct tte * tsb_vpntobucket(pmap_t pm, vm_offset_t vpn) diff --git a/sys/sparc64/sparc64/exception.S b/sys/sparc64/sparc64/exception.S index c796251db2bc..226fd7b3277e 100644 --- a/sys/sparc64/sparc64/exception.S +++ b/sys/sparc64/sparc64/exception.S @@ -75,8 +75,12 @@ __FBSDID("$FreeBSD$"); #include "assym.s" -#define TSB_KERNEL_MASK 0x0 -#define TSB_KERNEL 0x0 +#define TSB_ASI 0x0 +#define TSB_KERNEL 0x0 +#define TSB_KERNEL_MASK 0x0 +#define TSB_KERNEL_PHYS 0x0 +#define TSB_KERNEL_PHYS_END 0x0 +#define TSB_QUAD_LDD 0x0 .register %g2,#ignore .register %g3,#ignore @@ -84,19 +88,19 @@ __FBSDID("$FreeBSD$"); .register %g7,#ignore /* - * Atomically set the reference bit in a TTE. + * Atomically set a bit in a TTE. */ -#define TTE_SET_BIT(r1, r2, r3, bit) \ +#define TTE_SET_BIT(r1, r2, r3, bit, a, asi) \ add r1, TTE_DATA, r1 ; \ - ldx [r1], r2 ; \ + LD(x, a) [r1] asi, r2 ; \ 9: or r2, bit, r3 ; \ - casxa [r1] ASI_N, r2, r3 ; \ + CAS(x, a) [r1] asi, r2, r3 ; \ cmp r2, r3 ; \ bne,pn %xcc, 9b ; \ mov r3, r2 -#define TTE_SET_REF(r1, r2, r3) TTE_SET_BIT(r1, r2, r3, TD_REF) -#define TTE_SET_W(r1, r2, r3) TTE_SET_BIT(r1, r2, r3, TD_W) +#define TTE_SET_REF(r1, r2, r3, a, asi) TTE_SET_BIT(r1, r2, r3, TD_REF, a, asi) +#define TTE_SET_W(r1, r2, r3, a, asi) TTE_SET_BIT(r1, r2, r3, TD_W, a, asi) /* * Macros for spilling and filling live windows. @@ -691,7 +695,7 @@ ENTRY(tl0_immu_miss_set_ref) /* * Set the reference bit. */ - TTE_SET_REF(%g4, %g2, %g3) + TTE_SET_REF(%g4, %g2, %g3, a, ASI_N) /* * May have become invalid during casxa, in which case start over. @@ -849,7 +853,7 @@ ENTRY(tl0_dmmu_miss_set_ref) /* * Set the reference bit. */ - TTE_SET_REF(%g4, %g2, %g3) + TTE_SET_REF(%g4, %g2, %g3, a, ASI_N) /* * May have become invalid during casxa, in which case start over. @@ -997,7 +1001,7 @@ tl1_dmmu_prot_user: /* * Set the hardware write bit. */ - TTE_SET_W(%g4, %g2, %g3) + TTE_SET_W(%g4, %g2, %g3, a, ASI_N) /* * Delete the old TLB entry and clear the SFSR. @@ -1327,11 +1331,14 @@ END(tl1_sfsr_trap) * Compute the address of the TTE. The TSB mask and address of the * TSB are patched at startup. */ - .globl tl1_immu_miss_patch_1 -tl1_immu_miss_patch_1: + .globl tl1_immu_miss_patch_tsb_mask_1 +tl1_immu_miss_patch_tsb_mask_1: sethi %hi(TSB_KERNEL_MASK), %g6 or %g6, %lo(TSB_KERNEL_MASK), %g6 + .globl tl1_immu_miss_patch_tsb_1 +tl1_immu_miss_patch_tsb_1: sethi %hi(TSB_KERNEL), %g7 + or %g7, %lo(TSB_KERNEL), %g7 srlx %g5, TAR_VPN_SHIFT, %g5 and %g5, %g6, %g6 @@ -1341,7 +1348,9 @@ tl1_immu_miss_patch_1: /* * Load the TTE. */ - ldda [%g6] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */ + .globl tl1_immu_miss_patch_quad_ldd_1 +tl1_immu_miss_patch_quad_ldd_1: + ldda [%g6] TSB_QUAD_LDD, %g6 /*, %g7 */ /* * Check that it's valid and executable and that the virtual page @@ -1375,11 +1384,14 @@ ENTRY(tl1_immu_miss_set_ref) * Recompute the TTE address, which we clobbered loading the TTE. * The TSB mask and address of the TSB are patched at startup. */ - .globl tl1_immu_miss_patch_2 -tl1_immu_miss_patch_2: + .globl tl1_immu_miss_patch_tsb_mask_2 +tl1_immu_miss_patch_tsb_mask_2: sethi %hi(TSB_KERNEL_MASK), %g6 or %g6, %lo(TSB_KERNEL_MASK), %g6 + .globl tl1_immu_miss_patch_tsb_2 +tl1_immu_miss_patch_tsb_2: sethi %hi(TSB_KERNEL), %g7 + or %g7, %lo(TSB_KERNEL), %g7 and %g5, %g6, %g5 sllx %g5, TTE_SHIFT, %g5 @@ -1388,7 +1400,10 @@ tl1_immu_miss_patch_2: /* * Set the reference bit. */ - TTE_SET_REF(%g5, %g6, %g7) + .globl tl1_immu_miss_patch_asi_1 +tl1_immu_miss_patch_asi_1: + wr %g0, TSB_ASI, %asi + TTE_SET_REF(%g5, %g6, %g7, a, %asi) /* * May have become invalid during casxa, in which case start over. @@ -1447,11 +1462,14 @@ END(tl1_immu_miss_trap) * Compute the address of the TTE. The TSB mask and address of the * TSB are patched at startup. */ - .globl tl1_dmmu_miss_patch_1 -tl1_dmmu_miss_patch_1: + .globl tl1_dmmu_miss_patch_tsb_mask_1 +tl1_dmmu_miss_patch_tsb_mask_1: sethi %hi(TSB_KERNEL_MASK), %g6 or %g6, %lo(TSB_KERNEL_MASK), %g6 + .globl tl1_dmmu_miss_patch_tsb_1 +tl1_dmmu_miss_patch_tsb_1: sethi %hi(TSB_KERNEL), %g7 + or %g7, %lo(TSB_KERNEL), %g7 srlx %g5, TAR_VPN_SHIFT, %g5 and %g5, %g6, %g6 @@ -1461,7 +1479,9 @@ tl1_dmmu_miss_patch_1: /* * Load the TTE. */ - ldda [%g6] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */ + .globl tl1_dmmu_miss_patch_quad_ldd_1 +tl1_dmmu_miss_patch_quad_ldd_1: + ldda [%g6] TSB_QUAD_LDD, %g6 /*, %g7 */ /* * Check that it's valid and that the virtual page numbers match. @@ -1492,11 +1512,14 @@ ENTRY(tl1_dmmu_miss_set_ref) * Recompute the TTE address, which we clobbered loading the TTE. * The TSB mask and address of the TSB are patched at startup. */ - .globl tl1_dmmu_miss_patch_2 -tl1_dmmu_miss_patch_2: + .globl tl1_dmmu_miss_patch_tsb_mask_2 +tl1_dmmu_miss_patch_tsb_mask_2: sethi %hi(TSB_KERNEL_MASK), %g6 or %g6, %lo(TSB_KERNEL_MASK), %g6 + .globl tl1_dmmu_miss_patch_tsb_2 +tl1_dmmu_miss_patch_tsb_2: sethi %hi(TSB_KERNEL), %g7 + or %g7, %lo(TSB_KERNEL), %g7 and %g5, %g6, %g5 sllx %g5, TTE_SHIFT, %g5 @@ -1505,7 +1528,10 @@ tl1_dmmu_miss_patch_2: /* * Set the reference bit. */ - TTE_SET_REF(%g5, %g6, %g7) + .globl tl1_dmmu_miss_patch_asi_1 +tl1_dmmu_miss_patch_asi_1: + wr %g0, TSB_ASI, %asi + TTE_SET_REF(%g5, %g6, %g7, a, %asi) /* * May have become invalid during casxa, in which case start over. @@ -1545,15 +1571,36 @@ ENTRY(tl1_dmmu_miss_direct) * correspond to the TTE valid and page size bits are left set, so * they don't have to be included in the TTE bits below. We know they * are set because the virtual address is in the upper va hole. + * NB: if we are taking advantage of the ASI_ATOMIC_QUAD_LDD_PHYS + * and we get a miss on the directly accessed kernel TSB we must not + * set TD_CV in order to access it uniformly bypassing the D$. */ + setx TLB_DIRECT_ADDRESS_MASK, %g7, %g4 + and %g5, %g4, %g4 setx TLB_DIRECT_TO_TTE_MASK, %g7, %g6 and %g5, %g6, %g5 - or %g5, TD_CP | TD_CV | TD_W, %g5 + .globl tl1_dmmu_miss_direct_patch_tsb_phys_1 +tl1_dmmu_miss_direct_patch_tsb_phys_1: + sethi %hi(TSB_KERNEL_PHYS), %g7 + or %g7, %lo(TSB_KERNEL_PHYS), %g7 + cmp %g4, %g7 + bl,pt %xcc, 1f + or %g5, TD_CP | TD_W, %g5 + .globl tl1_dmmu_miss_direct_patch_tsb_phys_end_1 +tl1_dmmu_miss_direct_patch_tsb_phys_end_1: + sethi %hi(TSB_KERNEL_PHYS_END), %g7 + or %g7, %lo(TSB_KERNEL_PHYS_END), %g7 + cmp %g4, %g7 + bg,a,pt %xcc, 1f + nop + ba,pt %xcc, 2f + nop +1: or %g5, TD_CV, %g5 /* * Load the TTE data into the TLB and retry the instruction. */ - stxa %g5, [%g0] ASI_DTLB_DATA_IN_REG +2: stxa %g5, [%g0] ASI_DTLB_DATA_IN_REG retry END(tl1_dmmu_miss_direct) @@ -1584,11 +1631,14 @@ ENTRY(tl1_dmmu_prot_1) * Compute the address of the TTE. The TSB mask and address of the * TSB are patched at startup. */ - .globl tl1_dmmu_prot_patch_1 -tl1_dmmu_prot_patch_1: + .globl tl1_dmmu_prot_patch_tsb_mask_1 +tl1_dmmu_prot_patch_tsb_mask_1: sethi %hi(TSB_KERNEL_MASK), %g6 or %g6, %lo(TSB_KERNEL_MASK), %g6 + .globl tl1_dmmu_prot_patch_tsb_1 +tl1_dmmu_prot_patch_tsb_1: sethi %hi(TSB_KERNEL), %g7 + or %g7, %lo(TSB_KERNEL), %g7 srlx %g5, TAR_VPN_SHIFT, %g5 and %g5, %g6, %g6 @@ -1598,7 +1648,9 @@ tl1_dmmu_prot_patch_1: /* * Load the TTE. */ - ldda [%g6] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */ + .globl tl1_dmmu_prot_patch_quad_ldd_1 +tl1_dmmu_prot_patch_quad_ldd_1: + ldda [%g6] TSB_QUAD_LDD, %g6 /*, %g7 */ /* * Check that it's valid and writeable and that the virtual page @@ -1625,11 +1677,14 @@ tl1_dmmu_prot_patch_1: * Recompute the TTE address, which we clobbered loading the TTE. * The TSB mask and address of the TSB are patched at startup. */ - .globl tl1_dmmu_prot_patch_2 -tl1_dmmu_prot_patch_2: + .globl tl1_dmmu_prot_patch_tsb_mask_2 +tl1_dmmu_prot_patch_tsb_mask_2: sethi %hi(TSB_KERNEL_MASK), %g6 or %g6, %lo(TSB_KERNEL_MASK), %g6 + .globl tl1_dmmu_prot_patch_tsb_2 +tl1_dmmu_prot_patch_tsb_2: sethi %hi(TSB_KERNEL), %g7 + or %g7, %lo(TSB_KERNEL), %g7 and %g5, %g6, %g5 sllx %g5, TTE_SHIFT, %g5 @@ -1638,7 +1693,10 @@ tl1_dmmu_prot_patch_2: /* * Set the hardware write bit. */ - TTE_SET_W(%g5, %g6, %g7) + .globl tl1_dmmu_prot_patch_asi_1 +tl1_dmmu_prot_patch_asi_1: + wr %g0, TSB_ASI, %asi + TTE_SET_W(%g5, %g6, %g7, a, %asi) /* * May have become invalid during casxa, in which case start over. diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index b714805ce2e9..c0d268b05727 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -136,6 +136,7 @@ ASSYM(TS_MIN, TS_MIN); ASSYM(TS_MAX, TS_MAX); ASSYM(TLB_DAR_SLOT_SHIFT, TLB_DAR_SLOT_SHIFT); ASSYM(TLB_CXR_PGSZ_MASK, TLB_CXR_PGSZ_MASK); +ASSYM(TLB_DIRECT_ADDRESS_MASK, TLB_DIRECT_ADDRESS_MASK); ASSYM(TLB_DIRECT_TO_TTE_MASK, TLB_DIRECT_TO_TTE_MASK); ASSYM(TV_SIZE_BITS, TV_SIZE_BITS); #endif diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c index 588a0ca34e2e..8700f8904860 100644 --- a/sys/sparc64/sparc64/mp_machdep.c +++ b/sys/sparc64/sparc64/mp_machdep.c @@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$"); #include <machine/smp.h> #include <machine/tick.h> #include <machine/tlb.h> +#include <machine/tsb.h> #include <machine/tte.h> #include <machine/ver.h> @@ -439,8 +440,12 @@ cpu_mp_bootstrap(struct pcpu *pc) tick_clear(pc->pc_impl); tick_stop(pc->pc_impl); - /* Lock the kernel TSB in the TLB. */ - pmap_map_tsb(); + /* Set the kernel context. */ + pmap_set_kctx(); + + /* Lock the kernel TSB in the TLB if necessary. */ + if (tsb_kernel_ldd_phys == 0) + pmap_map_tsb(); /* * Flush all non-locked TLB entries possibly left over by the diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index e56c09430eb3..135960e091f1 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -156,6 +156,8 @@ struct pmap kernel_pmap_store; */ static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors); +static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data); + /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page @@ -166,12 +168,26 @@ static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors); static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired); -extern int tl1_immu_miss_patch_1[]; -extern int tl1_immu_miss_patch_2[]; -extern int tl1_dmmu_miss_patch_1[]; -extern int tl1_dmmu_miss_patch_2[]; -extern int tl1_dmmu_prot_patch_1[]; -extern int tl1_dmmu_prot_patch_2[]; +extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[]; +extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[]; +extern int tl1_dmmu_miss_patch_asi_1[]; +extern int tl1_dmmu_miss_patch_quad_ldd_1[]; +extern int tl1_dmmu_miss_patch_tsb_1[]; +extern int tl1_dmmu_miss_patch_tsb_2[]; +extern int tl1_dmmu_miss_patch_tsb_mask_1[]; +extern int tl1_dmmu_miss_patch_tsb_mask_2[]; +extern int tl1_dmmu_prot_patch_asi_1[]; +extern int tl1_dmmu_prot_patch_quad_ldd_1[]; +extern int tl1_dmmu_prot_patch_tsb_1[]; +extern int tl1_dmmu_prot_patch_tsb_2[]; +extern int tl1_dmmu_prot_patch_tsb_mask_1[]; +extern int tl1_dmmu_prot_patch_tsb_mask_2[]; +extern int tl1_immu_miss_patch_asi_1[]; +extern int tl1_immu_miss_patch_quad_ldd_1[]; +extern int tl1_immu_miss_patch_tsb_1[]; +extern int tl1_immu_miss_patch_tsb_2[]; +extern int tl1_immu_miss_patch_tsb_mask_1[]; +extern int tl1_immu_miss_patch_tsb_mask_2[]; /* * If user pmap is processed with pmap_remove and with pmap_remove and the @@ -302,13 +318,21 @@ pmap_bootstrap(u_int cpu_impl) vm_size_t physsz; vm_size_t virtsz; u_long data; + u_long vpn; phandle_t pmem; phandle_t vmem; u_int dtlb_slots_avail; int i; int j; int sz; + uint32_t asi; uint32_t colors; + uint32_t ldd; + + /* + * Set the kernel context. + */ + pmap_set_kctx(); colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS; @@ -355,40 +379,56 @@ pmap_bootstrap(u_int cpu_impl) /* * Calculate the size of kernel virtual memory, and the size and mask * for the kernel TSB based on the phsyical memory size but limited - * by the amount of dTLB slots available for locked entries (given - * that for spitfire-class CPUs all of the dt64 slots can hold locked - * entries but there is no large dTLB for unlocked ones, we don't use - * more than half of it for locked entries). + * by the amount of dTLB slots available for locked entries if we have + * to lock the TSB in the TLB (given that for spitfire-class CPUs all + * of the dt64 slots can hold locked entries but there is no large + * dTLB for unlocked ones, we don't use more than half of it for the + * TSB). + * Note that for reasons unknown OpenSolaris doesn't take advantage of + * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III. However, given that no + * public documentation is available for these, the latter just might + * not support it, yet. */ - dtlb_slots_avail = 0; - for (i = 0; i < dtlb_slots; i++) { - data = dtlb_get_data(i); - if ((data & (TD_V | TD_L)) != (TD_V | TD_L)) - dtlb_slots_avail++; - } + virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT)); + if (cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) + tsb_kernel_ldd_phys = 1; + else { + dtlb_slots_avail = 0; + for (i = 0; i < dtlb_slots; i++) { + data = dtlb_get_data(i); + if ((data & (TD_V | TD_L)) != (TD_V | TD_L)) + dtlb_slots_avail++; + } #ifdef SMP - dtlb_slots_avail -= PCPU_PAGES; + dtlb_slots_avail -= PCPU_PAGES; #endif - if (cpu_impl >= CPU_IMPL_ULTRASPARCI && - cpu_impl < CPU_IMPL_ULTRASPARCIII) - dtlb_slots_avail /= 2; - virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT)); - virtsz = MIN(virtsz, - (dtlb_slots_avail * PAGE_SIZE_4M) << (PAGE_SHIFT - TTE_SHIFT)); + if (cpu_impl >= CPU_IMPL_ULTRASPARCI && + cpu_impl < CPU_IMPL_ULTRASPARCIII) + dtlb_slots_avail /= 2; + virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) << + (PAGE_SHIFT - TTE_SHIFT)); + } vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz; tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT); tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1; /* - * Allocate the kernel TSB and lock it in the TLB. + * Allocate the kernel TSB and lock it in the TLB if necessary. */ pa = pmap_bootstrap_alloc(tsb_kernel_size, colors); if (pa & PAGE_MASK_4M) - panic("pmap_bootstrap: tsb unaligned\n"); + panic("pmap_bootstrap: TSB unaligned\n"); tsb_kernel_phys = pa; - tsb_kernel = (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size); - pmap_map_tsb(); - bzero(tsb_kernel, tsb_kernel_size); + if (tsb_kernel_ldd_phys == 0) { + tsb_kernel = + (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size); + pmap_map_tsb(); + bzero(tsb_kernel, tsb_kernel_size); + } else { + tsb_kernel = + (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys); + aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size); + } /* * Allocate and map the dynamic per-CPU area for the BSP. @@ -403,35 +443,84 @@ pmap_bootstrap(u_int cpu_impl) msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa); /* - * Patch the virtual address and the tsb mask into the trap table. + * Patch the TSB addresses and mask as well as the ASIs used to load + * it into the trap table. */ -#define SETHI(rd, imm22) \ - (EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) | \ +#define LDDA_R_I_R(rd, imm_asi, rs1, rs2) \ + (EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) | \ + EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) | \ + EIF_F3_RS2(rs2)) +#define OR_R_I_R(rd, imm13, rs1) \ + (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) | \ + EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13)) +#define SETHI(rd, imm22) \ + (EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) | \ EIF_IMM((imm22) >> 10, 22)) -#define OR_R_I_R(rd, imm13, rs1) \ - (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) | \ +#define WR_R_I(rd, imm13, rs1) \ + (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) | \ EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13)) -#define PATCH(addr) do { \ - if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) || \ - addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0, IF_F3_RS1(addr[1])) || \ - addr[2] != SETHI(IF_F2_RD(addr[2]), 0x0)) \ - panic("pmap_boostrap: patched instructions have changed"); \ - addr[0] |= EIF_IMM((tsb_kernel_mask) >> 10, 22); \ - addr[1] |= EIF_IMM(tsb_kernel_mask, 10); \ - addr[2] |= EIF_IMM(((vm_offset_t)tsb_kernel) >> 10, 22); \ - flush(addr); \ - flush(addr + 1); \ - flush(addr + 2); \ +#define PATCH_ASI(addr, asi) do { \ + if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0, \ + IF_F3_RS1(addr[0]))) \ + panic("%s: patched instructions have changed", \ + __func__); \ + addr[0] |= EIF_IMM((asi), 13); \ + flush(addr); \ +} while (0) + +#define PATCH_LDD(addr, asi) do { \ + if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0, \ + IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0]))) \ + panic("%s: patched instructions have changed", \ + __func__); \ + addr[0] |= EIF_F3_IMM_ASI(asi); \ + flush(addr); \ } while (0) - PATCH(tl1_immu_miss_patch_1); - PATCH(tl1_immu_miss_patch_2); - PATCH(tl1_dmmu_miss_patch_1); - PATCH(tl1_dmmu_miss_patch_2); - PATCH(tl1_dmmu_prot_patch_1); - PATCH(tl1_dmmu_prot_patch_2); +#define PATCH_TSB(addr, val) do { \ + if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) || \ + addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0, \ + IF_F3_RS1(addr[1]))) \ + panic("%s: patched instructions have changed", \ + __func__); \ + addr[0] |= EIF_IMM((val) >> 10, 22); \ + addr[1] |= EIF_IMM((val), 10); \ + flush(addr); \ + flush(addr + 1); \ +} while (0) + + if (tsb_kernel_ldd_phys == 0) { + asi = ASI_N; + ldd = ASI_NUCLEUS_QUAD_LDD; + off = (vm_offset_t)tsb_kernel; + } else { + asi = ASI_PHYS_USE_EC; + ldd = ASI_ATOMIC_QUAD_LDD_PHYS; + off = (vm_offset_t)tsb_kernel_phys; + } + PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys); + PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1, + tsb_kernel_phys + tsb_kernel_size - 1); + PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi); + PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd); + PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off); + PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off); + PATCH_TSB(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask); + PATCH_TSB(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask); + PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi); + PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd); + PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off); + PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off); + PATCH_TSB(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask); + PATCH_TSB(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask); + PATCH_ASI(tl1_immu_miss_patch_asi_1, asi); + PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd); + PATCH_TSB(tl1_immu_miss_patch_tsb_1, off); + PATCH_TSB(tl1_immu_miss_patch_tsb_2, off); + PATCH_TSB(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask); + PATCH_TSB(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask); /* * Enter fake 8k pages for the 4MB kernel pages, so that @@ -442,9 +531,10 @@ pmap_bootstrap(u_int cpu_impl) va = kernel_tlbs[i].te_va; for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) { tp = tsb_kvtotte(va + off); - tp->tte_vpn = TV_VPN(va + off, TS_8K); - tp->tte_data = TD_V | TD_8K | TD_PA(pa + off) | - TD_REF | TD_SW | TD_CP | TD_CV | TD_P | TD_W; + vpn = TV_VPN(va + off, TS_8K); + data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF | + TD_SW | TD_CP | TD_CV | TD_P | TD_W; + pmap_bootstrap_set_tte(tp, vpn, data); } } @@ -485,9 +575,10 @@ pmap_bootstrap(u_int cpu_impl) pa = kstack0_phys + i * PAGE_SIZE; va = kstack0 + i * PAGE_SIZE; tp = tsb_kvtotte(va); - tp->tte_vpn = TV_VPN(va, TS_8K); - tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | - TD_CP | TD_CV | TD_P | TD_W; + vpn = TV_VPN(va, TS_8K); + data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP | + TD_CV | TD_P | TD_W; + pmap_bootstrap_set_tte(tp, vpn, data); } /* @@ -527,9 +618,8 @@ pmap_bootstrap(u_int cpu_impl) off += PAGE_SIZE) { va = translations[i].om_start + off; tp = tsb_kvtotte(va); - tp->tte_vpn = TV_VPN(va, TS_8K); - tp->tte_data = - ((translations[i].om_tte & + vpn = TV_VPN(va, TS_8K); + data = ((translations[i].om_tte & ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) | (cpu_impl >= CPU_IMPL_ULTRASPARCI && cpu_impl < CPU_IMPL_ULTRASPARCIII ? @@ -537,6 +627,7 @@ pmap_bootstrap(u_int cpu_impl) (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) | (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) + off; + pmap_bootstrap_set_tte(tp, vpn, data); } } @@ -571,20 +662,17 @@ pmap_bootstrap(u_int cpu_impl) tlb_flush_nonlocked(); } +/* + * Map the 4MB kernel TSB pages. + */ void pmap_map_tsb(void) { vm_offset_t va; vm_paddr_t pa; u_long data; - register_t s; int i; - s = intr_disable(); - - /* - * Map the 4MB TSB pages. - */ for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) { va = (vm_offset_t)tsb_kernel + i; pa = tsb_kernel_phys + i; @@ -594,16 +682,19 @@ pmap_map_tsb(void) TLB_TAR_CTX(TLB_CTX_KERNEL)); stxa_sync(0, ASI_DTLB_DATA_IN_REG, data); } +} + +/* + * Set the secondary context to be the kernel context (needed for FP block + * operations in the kernel). + */ +void +pmap_set_kctx(void) +{ - /* - * Set the secondary context to be the kernel context (needed for - * FP block operations in the kernel). - */ stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) & TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL); flush(KERNBASE); - - intr_restore(s); } /* @@ -629,6 +720,27 @@ pmap_bootstrap_alloc(vm_size_t size, uint32_t colors) } /* + * Set a TTE. This function is intended as a helper when tsb_kernel is + * direct-mapped but we haven't taken over the trap table, yet, as it's the + * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access + * the kernel TSB. + */ +void +pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data) +{ + + if (tsb_kernel_ldd_phys == 0) { + tp->tte_vpn = vpn; + tp->tte_data = data; + } else { + stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn), + ASI_PHYS_USE_EC, vpn); + stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data), + ASI_PHYS_USE_EC, data); + } +} + +/* * Initialize a vm_page's machine-dependent fields. */ void diff --git a/sys/sparc64/sparc64/tsb.c b/sys/sparc64/sparc64/tsb.c index 72206b7afdd6..37606b0716a5 100644 --- a/sys/sparc64/sparc64/tsb.c +++ b/sys/sparc64/sparc64/tsb.c @@ -26,9 +26,11 @@ * SUCH DAMAGE. * * from BSDI: pmap.c,v 1.28.2.15 2000/04/27 03:10:31 cp Exp - * $FreeBSD$ */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include "opt_ddb.h" #include "opt_pmap.h" @@ -43,7 +45,7 @@ #include <sys/sysctl.h> #include <sys/systm.h> -#include <vm/vm.h> +#include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_kern.h> #include <vm/vm_page.h> @@ -78,6 +80,8 @@ struct tte *tsb_kernel; vm_size_t tsb_kernel_mask; vm_size_t tsb_kernel_size; vm_paddr_t tsb_kernel_phys; +vm_paddr_t tsb_kernel_phys_end; +u_int tsb_kernel_ldd_phys; struct tte * tsb_tte_lookup(pmap_t pm, vm_offset_t va) |