aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/subr_smp.c
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2016-10-24 16:40:27 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2016-10-24 16:40:27 +0000
commit835c2787bed752ffeeaeceae9f01839db0f18dff (patch)
treee7ee1386557b6833d1b766a4f747c0fb232ba568 /sys/kern/subr_smp.c
parent1d1140af580219d33a04ec84536caa6ef3e3d264 (diff)
downloadsrc-835c2787bed752ffeeaeceae9f01839db0f18dff.tar.gz
src-835c2787bed752ffeeaeceae9f01839db0f18dff.zip
Handle broadcast NMIs.
On several Intel chipsets, diagnostic NMIs sent from BMC or NMIs reporting hardware errors are broadcasted to all CPUs. When kernel is configured to enter kdb on NMI, the outcome is problematic, because each CPU tries to enter kdb. All CPUs are executing NMI handlers, which set the latches disabling the nested NMI delivery; this means that stop_cpus_hard(), used by kdb_enter() to stop other cpus by broadcasting IPI_STOP_HARD NMI, cannot work. One indication of this is the harmless but annoying diagnostic "timeout stopping cpus". Much more harming behaviour is that because all CPUs try to enter kdb, and if ddb is used as debugger, all CPUs issue prompt on console and race for the input, not to mention the simultaneous use of the ddb shared state. Try to fix this by introducing a pseudo-lock for simultaneous attempts to handle NMIs. If one core happens to enter NMI trap handler, other cores see it and simulate reception of the IPI_STOP_HARD. More, generic_stop_cpus() avoids sending IPI_STOP_HARD and avoids waiting for the acknowledgement, relying on the nmi handler on other cores suspending and then restarting the CPU. Since it is impossible to detect at runtime whether some stray NMI is broadcast or unicast, add a knob for administrator (really developer) to configure debugging NMI handling mode. The updated patch was debugged with the help from Andrey Gapon (avg) and discussed with him. Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D8249
Notes
Notes: svn path=/head/; revision=307866
Diffstat (limited to 'sys/kern/subr_smp.c')
-rw-r--r--sys/kern/subr_smp.c53
1 files changed, 34 insertions, 19 deletions
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index d98b1b7212ea..0ab5a9afc743 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -209,6 +209,11 @@ forward_signal(struct thread *td)
* 1: ok
*
*/
+#if defined(__amd64__) || defined(__i386__)
+#define X86 1
+#else
+#define X86 0
+#endif
static int
generic_stop_cpus(cpuset_t map, u_int type)
{
@@ -220,12 +225,11 @@ generic_stop_cpus(cpuset_t map, u_int type)
volatile cpuset_t *cpus;
KASSERT(
-#if defined(__amd64__) || defined(__i386__)
- type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
-#else
- type == IPI_STOP || type == IPI_STOP_HARD,
+ type == IPI_STOP || type == IPI_STOP_HARD
+#if X86
+ || type == IPI_SUSPEND
#endif
- ("%s: invalid stop type", __func__));
+ , ("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
@@ -233,7 +237,7 @@ generic_stop_cpus(cpuset_t map, u_int type)
CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
cpusetobj_strprint(cpusetbuf, &map), type);
-#if defined(__amd64__) || defined(__i386__)
+#if X86
/*
* When suspending, ensure there are are no IPIs in progress.
* IPIs that have been issued, but not yet delivered (e.g.
@@ -245,6 +249,9 @@ generic_stop_cpus(cpuset_t map, u_int type)
mtx_lock_spin(&smp_ipi_mtx);
#endif
+#if X86
+ if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
+#endif
if (stopping_cpu != PCPU_GET(cpuid))
while (atomic_cmpset_int(&stopping_cpu, NOCPU,
PCPU_GET(cpuid)) == 0)
@@ -253,8 +260,11 @@ generic_stop_cpus(cpuset_t map, u_int type)
/* send the stop IPI to all CPUs in map */
ipi_selected(map, type);
+#if X86
+ }
+#endif
-#if defined(__amd64__) || defined(__i386__)
+#if X86
if (type == IPI_SUSPEND)
cpus = &suspended_cpus;
else
@@ -272,7 +282,7 @@ generic_stop_cpus(cpuset_t map, u_int type)
}
}
-#if defined(__amd64__) || defined(__i386__)
+#if X86
if (type == IPI_SUSPEND)
mtx_unlock_spin(&smp_ipi_mtx);
#endif
@@ -295,7 +305,7 @@ stop_cpus_hard(cpuset_t map)
return (generic_stop_cpus(map, IPI_STOP_HARD));
}
-#if defined(__amd64__) || defined(__i386__)
+#if X86
int
suspend_cpus(cpuset_t map)
{
@@ -325,20 +335,18 @@ generic_restart_cpus(cpuset_t map, u_int type)
#endif
volatile cpuset_t *cpus;
- KASSERT(
-#if defined(__amd64__) || defined(__i386__)
- type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
-#else
- type == IPI_STOP || type == IPI_STOP_HARD,
+ KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
+#if X86
+ || type == IPI_SUSPEND
#endif
- ("%s: invalid stop type", __func__));
+ , ("%s: invalid stop type", __func__));
if (!smp_started)
- return 0;
+ return (0);
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
-#if defined(__amd64__) || defined(__i386__)
+#if X86
if (type == IPI_SUSPEND)
cpus = &suspended_cpus;
else
@@ -348,11 +356,17 @@ generic_restart_cpus(cpuset_t map, u_int type)
/* signal other cpus to restart */
CPU_COPY_STORE_REL(&map, &started_cpus);
+#if X86
+ if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
+#endif
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
+#if X86
+ }
+#endif
- return 1;
+ return (1);
}
int
@@ -362,7 +376,7 @@ restart_cpus(cpuset_t map)
return (generic_restart_cpus(map, IPI_STOP));
}
-#if defined(__amd64__) || defined(__i386__)
+#if X86
int
resume_cpus(cpuset_t map)
{
@@ -370,6 +384,7 @@ resume_cpus(cpuset_t map)
return (generic_restart_cpus(map, IPI_SUSPEND));
}
#endif
+#undef X86
/*
* All-CPU rendezvous. CPUs are signalled, all execute the setup function