aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorConrad Meyer <cem@FreeBSD.org>2019-05-04 20:34:26 +0000
committerConrad Meyer <cem@FreeBSD.org>2019-05-04 20:34:26 +0000
commit665919aaaf436bfc662701f0b0efa4488ea1b1a2 (patch)
tree2e235e29055d517e6a2f123621fe0bcb173df359 /sys
parentecaed009a9743ac085227b3740ea80f5ffc5063d (diff)
downloadsrc-665919aaaf436bfc662701f0b0efa4488ea1b1a2.tar.gz
src-665919aaaf436bfc662701f0b0efa4488ea1b1a2.zip
x86: Implement MWAIT support for stopping a CPU
IPI_STOP is used after panic or when ddb is entered manually. MONITOR/ MWAIT allows CPUs that support the feature to sleep in a low power way instead of spinning. Something similar is already used at idle. It is perhaps especially useful in oversubscribed VM environments, and is safe to use even if the panic/ddb thread is not the BSP. (Except in the presence of MWAIT errata, which are detected automatically on platforms with known wakeup problems.) It can be tuned/sysctled with "machdep.stop_mwait," which defaults to 0 (off). This commit also introduces the tunable "machdep.mwait_cpustop_broken," which defaults to 0, unless the CPU has known errata, but may be set to "1" in loader.conf to signal that mwait wakeup is broken on CPUs FreeBSD does not yet know about. Unfortunately, Bhyve doesn't yet support MONITOR extensions, so this doesn't help bhyve hypervisors running FreeBSD guests. Submitted by: Anton Rang <rang AT acm.org> (earlier version) Reviewed by: kib Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D20135
Notes
Notes: svn path=/head/; revision=347134
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/include/pcpu.h6
-rw-r--r--sys/i386/include/pcpu.h6
-rw-r--r--sys/kern/subr_smp.c52
-rw-r--r--sys/x86/include/x86_smp.h5
-rw-r--r--sys/x86/x86/cpu_machdep.c13
-rw-r--r--sys/x86/x86/mp_x86.c26
6 files changed, 91 insertions, 17 deletions
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 4ca9d5588e5f..044e589b8c18 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -39,7 +39,8 @@
struct monitorbuf {
int idle_state; /* Used by cpu_idle_mwait. */
- char padding[128 - (1 * sizeof(int))];
+ int stop_state; /* Used by cpustop_handler. */
+ char padding[128 - (2 * sizeof(int))];
};
_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
@@ -90,6 +91,9 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
#ifdef _KERNEL
+#define MONITOR_STOPSTATE_RUNNING 0
+#define MONITOR_STOPSTATE_STOPPED 1
+
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
/*
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index 3ae8e7e14422..0758e9e63b61 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -43,7 +43,8 @@
struct monitorbuf {
int idle_state; /* Used by cpu_idle_mwait. */
- char padding[128 - (1 * sizeof(int))];
+ int stop_state; /* Used by cpustop_handler. */
+ char padding[128 - (2 * sizeof(int))];
};
_Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
@@ -90,6 +91,9 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
#ifdef _KERNEL
+#define MONITOR_STOPSTATE_RUNNING 0
+#define MONITOR_STOPSTATE_STOPPED 1
+
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
/*
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 280ea96e36ae..eceb834ce969 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -351,42 +351,68 @@ generic_restart_cpus(cpuset_t map, u_int type)
#endif
volatile cpuset_t *cpus;
- KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
#if X86
- || type == IPI_SUSPEND
-#endif
- , ("%s: invalid stop type", __func__));
+ KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
+ || type == IPI_SUSPEND, ("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
-#if X86
if (type == IPI_SUSPEND)
cpus = &resuming_cpus;
else
-#endif
cpus = &stopped_cpus;
/* signal other cpus to restart */
-#if X86
if (type == IPI_SUSPEND)
CPU_COPY_STORE_REL(&map, &toresume_cpus);
else
-#endif
CPU_COPY_STORE_REL(&map, &started_cpus);
-#if X86
+ /*
+ * Wake up any CPUs stopped with MWAIT. From MI code we can't tell if
+ * MONITOR/MWAIT is enabled, but the potentially redundant writes are
+ * relatively inexpensive.
+ */
+ if (type == IPI_STOP) {
+ struct monitorbuf *mb;
+ u_int id;
+
+ CPU_FOREACH(id) {
+ if (!CPU_ISSET(id, &map))
+ continue;
+
+ mb = &pcpu_find(id)->pc_monitorbuf;
+ atomic_store_int(&mb->stop_state,
+ MONITOR_STOPSTATE_RUNNING);
+ }
+ }
+
if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
-#endif
+ /* wait for each to clear its bit */
+ while (CPU_OVERLAP(cpus, &map))
+ cpu_spinwait();
+ }
+#else /* !X86 */
+ KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
+ ("%s: invalid stop type", __func__));
+
+ if (!smp_started)
+ return (0);
+
+ CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
+
+ cpus = &stopped_cpus;
+
+ /* signal other cpus to restart */
+ CPU_COPY_STORE_REL(&map, &started_cpus);
+
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
-#if X86
- }
#endif
-
return (1);
}
diff --git a/sys/x86/include/x86_smp.h b/sys/x86/include/x86_smp.h
index f7e28df06ccb..f598acfe216b 100644
--- a/sys/x86/include/x86_smp.h
+++ b/sys/x86/include/x86_smp.h
@@ -61,6 +61,11 @@ struct cpu_info {
};
extern struct cpu_info *cpu_info;
+/*
+ * Set if MWAIT does not reliably wake when the MONITORed address is written.
+ */
+extern bool mwait_cpustop_broken;
+
#ifdef COUNT_IPIS
extern u_long *ipi_invltlb_counts[MAXCPU];
extern u_long *ipi_invlrng_counts[MAXCPU];
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
index 71925c38560d..6505d40ed365 100644
--- a/sys/x86/x86/cpu_machdep.c
+++ b/sys/x86/x86/cpu_machdep.c
@@ -110,6 +110,13 @@ static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
+/*
+ * Automatically initialized per CPU errata in cpu_idle_tun below.
+ */
+bool mwait_cpustop_broken = false;
+SYSCTL_BOOL(_machdep, OID_AUTO, mwait_cpustop_broken, CTLFLAG_RDTUN,
+ &mwait_cpustop_broken, 0,
+ "Can not reliably wake MONITOR/MWAIT cpus without interrupts");
/*
* Machine dependent boot() routine
@@ -358,6 +365,7 @@ void
cpu_reset(void)
{
#ifdef SMP
+ struct monitorbuf *mb;
cpuset_t map;
u_int cnt;
@@ -378,6 +386,9 @@ cpu_reset(void)
/* Restart CPU #0. */
CPU_SETOF(0, &started_cpus);
+ mb = &pcpu_find(0)->pc_monitorbuf;
+ atomic_store_int(&mb->stop_state,
+ MONITOR_STOPSTATE_RUNNING);
wmb();
cnt = 0;
@@ -716,6 +727,7 @@ cpu_idle_tun(void *unused __unused)
/* Ryzen erratas 1057, 1109. */
cpu_idle_selector("hlt");
idle_mwait = 0;
+ mwait_cpustop_broken = true;
}
if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) {
@@ -727,6 +739,7 @@ cpu_idle_tun(void *unused __unused)
* sleep states.
*/
cpu_idle_apl31_workaround = 1;
+ mwait_cpustop_broken = true;
}
TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround);
}
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
index 49c45e382b83..43e66d81dbaa 100644
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -161,6 +161,10 @@ struct cache_info {
unsigned int boot_address;
+static bool stop_mwait = false;
+SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0,
+ "Use MONITOR/MWAIT when stopping CPU, if available");
+
#define MiB(v) (v ## ULL << 20)
void
@@ -1390,23 +1394,41 @@ nmi_call_kdb_smp(u_int type, struct trapframe *frame)
}
/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
+ * Handle an IPI_STOP by saving our current context and spinning (or mwaiting,
+ * if available) until we are resumed.
*/
void
cpustop_handler(void)
{
+ struct monitorbuf *mb;
u_int cpu;
+ bool use_mwait;
cpu = PCPU_GET(cpuid);
savectx(&stoppcbs[cpu]);
+ use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 &&
+ !mwait_cpustop_broken);
+ if (use_mwait) {
+ mb = PCPU_PTR(monitorbuf);
+ atomic_store_int(&mb->stop_state,
+ MONITOR_STOPSTATE_STOPPED);
+ }
+
/* Indicate that we are stopped */
CPU_SET_ATOMIC(cpu, &stopped_cpus);
/* Wait for restart */
while (!CPU_ISSET(cpu, &started_cpus)) {
+ if (use_mwait) {
+ cpu_monitor(mb, 0, 0);
+ if (atomic_load_int(&mb->stop_state) ==
+ MONITOR_STOPSTATE_STOPPED)
+ cpu_mwait(0, MWAIT_C1);
+ continue;
+ }
+
ia32_pause();
/*