aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2015-09-11 03:54:37 +0000
committerMark Johnston <markj@FreeBSD.org>2015-09-11 03:54:37 +0000
commit610141cebb67f24c49c698b8828067db5acc1b55 (patch)
tree28426773e296f84e10d3d95e2a1489974f368b93
parent1e954a7c639b52ed66042795c7445f16c4fc4dc9 (diff)
downloadsrc-610141cebb67f24c49c698b8828067db5acc1b55.tar.gz
src-610141cebb67f24c49c698b8828067db5acc1b55.zip
Add stack_save_td_running(), a function to trace the kernel stack of a
running thread. It is currently implemented only on amd64 and i386; on these architectures, it is implemented by raising an NMI on the CPU on which the target thread is currently running. Unlike stack_save_td(), it may fail, for example if the thread is running in user mode. This change also modifies the kern.proc.kstack sysctl to use this function, so that stacks of running threads are shown in the output of "procstat -kk". This is handy for debugging threads that are stuck in a busy loop. Reviewed by: bdrewery, jhb, kib Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D3256
Notes
Notes: svn path=/head/; revision=287645
-rw-r--r--sys/amd64/amd64/trap.c29
-rw-r--r--sys/arm/arm/stack_machdep.c7
-rw-r--r--sys/arm64/arm64/stack_machdep.c7
-rw-r--r--sys/i386/i386/trap.c31
-rw-r--r--sys/kern/kern_proc.c11
-rw-r--r--sys/mips/mips/stack_machdep.c7
-rw-r--r--sys/powerpc/powerpc/stack_machdep.c7
-rw-r--r--sys/sparc64/sparc64/stack_machdep.c7
-rw-r--r--sys/sys/stack.h3
-rw-r--r--sys/x86/include/apicvar.h4
-rw-r--r--sys/x86/include/stack.h4
-rw-r--r--sys/x86/x86/local_apic.c7
-rw-r--r--sys/x86/x86/mp_x86.c12
-rw-r--r--sys/x86/x86/stack_machdep.c68
14 files changed, 165 insertions, 39 deletions
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 3bf63c36e5a8..776f90c6fb88 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
#include "opt_kdb.h"
+#include "opt_stack.h"
#include <sys/param.h>
#include <sys/bus.h>
@@ -91,6 +92,7 @@ PMC_SOFT_DEFINE( , , page_fault, write);
#ifdef SMP
#include <machine/smp.h>
#endif
+#include <machine/stack.h>
#include <machine/tss.h>
#ifdef KDTRACE_HOOKS
@@ -202,17 +204,24 @@ trap(struct trapframe *frame)
goto out;
}
-#ifdef HWPMC_HOOKS
- /*
- * CPU PMCs interrupt using an NMI. If the PMC module is
- * active, pass the 'rip' value to the PMC module's interrupt
- * handler. A return value of '1' from the handler means that
- * the NMI was handled by it and we can return immediately.
- */
- if (type == T_NMI && pmc_intr &&
- (*pmc_intr)(PCPU_GET(cpuid), frame))
- goto out;
+ if (type == T_NMI) {
+#ifdef HWPMC_HOOKS
+ /*
+ * CPU PMCs interrupt using an NMI. If the PMC module is
+ * active, pass the 'rip' value to the PMC module's interrupt
+ * handler. A non-zero return value from the handler means that
+ * the NMI was consumed by it and we can return immediately.
+ */
+ if (pmc_intr != NULL &&
+ (*pmc_intr)(PCPU_GET(cpuid), frame) != 0)
+ goto out;
+#endif
+
+#ifdef STACK
+ if (stack_nmi_handler(frame) != 0)
+ goto out;
#endif
+ }
if (type == T_MCHK) {
mca_intr();
diff --git a/sys/arm/arm/stack_machdep.c b/sys/arm/arm/stack_machdep.c
index 9e68023e8d92..6d23be6edc7e 100644
--- a/sys/arm/arm/stack_machdep.c
+++ b/sys/arm/arm/stack_machdep.c
@@ -71,6 +71,13 @@ stack_save_td(struct stack *st, struct thread *td)
stack_capture(st, frame);
}
+int
+stack_save_td_running(struct stack *st, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
void
stack_save(struct stack *st)
{
diff --git a/sys/arm64/arm64/stack_machdep.c b/sys/arm64/arm64/stack_machdep.c
index 72a9ab9e92b6..0212c6335a05 100644
--- a/sys/arm64/arm64/stack_machdep.c
+++ b/sys/arm64/arm64/stack_machdep.c
@@ -72,6 +72,13 @@ stack_save_td(struct stack *st, struct thread *td)
stack_capture(st, &frame);
}
+int
+stack_save_td_running(struct stack *st, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
void
stack_save(struct stack *st)
{
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index a3b1b0d2e9ec..40f72042d27f 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kdb.h"
#include "opt_npx.h"
+#include "opt_stack.h"
#include "opt_trap.h"
#include <sys/param.h>
@@ -94,6 +95,7 @@ PMC_SOFT_DEFINE( , , page_fault, write);
#ifdef SMP
#include <machine/smp.h>
#endif
+#include <machine/stack.h>
#include <machine/tss.h>
#include <machine/vm86.h>
@@ -219,19 +221,26 @@ trap(struct trapframe *frame)
goto out;
}
-#ifdef HWPMC_HOOKS
- /*
- * CPU PMCs interrupt using an NMI so we check for that first.
- * If the HWPMC module is active, 'pmc_hook' will point to
- * the function to be called. A return value of '1' from the
- * hook means that the NMI was handled by it and that we can
- * return immediately.
- */
- if (type == T_NMI && pmc_intr &&
- (*pmc_intr)(PCPU_GET(cpuid), frame))
- goto out;
+ if (type == T_NMI) {
+#ifdef HWPMC_HOOKS
+ /*
+ * CPU PMCs interrupt using an NMI so we check for that first.
+ * If the HWPMC module is active, 'pmc_hook' will point to
+ * the function to be called. A non-zero return value from the
+ * hook means that the NMI was consumed by it and that we can
+ * return immediately.
+ */
+ if (pmc_intr != NULL &&
+ (*pmc_intr)(PCPU_GET(cpuid), frame) != 0)
+ goto out;
#endif
+#ifdef STACK
+ if (stack_nmi_handler(frame) != 0)
+ goto out;
+#endif
+ }
+
if (type == T_MCHK) {
mca_intr();
goto out;
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 3c88a59ca7a5..aa4c9044c8d1 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2517,11 +2517,14 @@ repeat:
sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
thread_lock(td);
kkstp->kkst_tid = td->td_tid;
- if (TD_IS_SWAPPED(td))
+ if (TD_IS_SWAPPED(td)) {
kkstp->kkst_state = KKST_STATE_SWAPPED;
- else if (TD_IS_RUNNING(td))
- kkstp->kkst_state = KKST_STATE_RUNNING;
- else {
+ } else if (TD_IS_RUNNING(td)) {
+ if (stack_save_td_running(st, td) == 0)
+ kkstp->kkst_state = KKST_STATE_STACKOK;
+ else
+ kkstp->kkst_state = KKST_STATE_RUNNING;
+ } else {
kkstp->kkst_state = KKST_STATE_STACKOK;
stack_save_td(st, td);
}
diff --git a/sys/mips/mips/stack_machdep.c b/sys/mips/mips/stack_machdep.c
index e7971a2a9ccf..9b724cb3fc90 100644
--- a/sys/mips/mips/stack_machdep.c
+++ b/sys/mips/mips/stack_machdep.c
@@ -142,6 +142,13 @@ stack_save_td(struct stack *st, struct thread *td)
stack_capture(st, pc, sp);
}
+int
+stack_save_td_running(struct stack *st, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
void
stack_save(struct stack *st)
{
diff --git a/sys/powerpc/powerpc/stack_machdep.c b/sys/powerpc/powerpc/stack_machdep.c
index a6e036444523..451e7bef71b8 100644
--- a/sys/powerpc/powerpc/stack_machdep.c
+++ b/sys/powerpc/powerpc/stack_machdep.c
@@ -98,6 +98,13 @@ stack_save_td(struct stack *st, struct thread *td)
stack_capture(st, frame);
}
+int
+stack_save_td_running(struct stack *st, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
void
stack_save(struct stack *st)
{
diff --git a/sys/sparc64/sparc64/stack_machdep.c b/sys/sparc64/sparc64/stack_machdep.c
index 923d72cc8d2d..329368d78051 100644
--- a/sys/sparc64/sparc64/stack_machdep.c
+++ b/sys/sparc64/sparc64/stack_machdep.c
@@ -82,6 +82,13 @@ stack_save_td(struct stack *st, struct thread *td)
stack_capture(st, (struct frame *)(td->td_pcb->pcb_sp + SPOFF));
}
+int
+stack_save_td_running(struct stack *st, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
void
stack_save(struct stack *st)
{
diff --git a/sys/sys/stack.h b/sys/sys/stack.h
index 5531467e22ff..e26b535e12ab 100644
--- a/sys/sys/stack.h
+++ b/sys/sys/stack.h
@@ -56,9 +56,10 @@ void stack_ktr(u_int, const char *, int, const struct stack *,
#define CTRSTACK(m, st, depth, cheap)
#endif
-/* MD Routine. */
+/* MD Routines. */
struct thread;
void stack_save(struct stack *);
void stack_save_td(struct stack *, struct thread *);
+int stack_save_td_running(struct stack *, struct thread *);
#endif
diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h
index 0bd9fe5ece62..58fccede6cd0 100644
--- a/sys/x86/include/apicvar.h
+++ b/sys/x86/include/apicvar.h
@@ -129,12 +129,14 @@
#else
#define IPI_DYN_FIRST (APIC_IPI_INTS + 8)
#endif
-#define IPI_DYN_LAST (254) /* IPIs allocated at runtime */
+#define IPI_DYN_LAST (253) /* IPIs allocated at runtime */
/*
* IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since
* it is delivered using an NMI anyways.
*/
+#define IPI_NMI_FIRST 254
+#define IPI_TRACE 254 /* Interrupt for tracing. */
#define IPI_STOP_HARD 255 /* Stop CPU with a NMI. */
/*
diff --git a/sys/x86/include/stack.h b/sys/x86/include/stack.h
index 3489e42c5802..7f6930a58b37 100644
--- a/sys/x86/include/stack.h
+++ b/sys/x86/include/stack.h
@@ -54,4 +54,8 @@ struct i386_frame {
};
#endif /* __amd64__ */
+#ifdef _KERNEL
+int stack_nmi_handler(struct trapframe *);
+#endif
+
#endif /* !_X86_STACK_H */
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 81989718e611..106a842f5d63 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -1703,11 +1703,10 @@ native_lapic_ipi_vectored(u_int vector, int dest)
icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
/*
- * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
- * Use special rules regard NMI if passed, otherwise specify
- * the vector.
+ * NMI IPIs are just fake vectors used to send a NMI. Use special rules
+ * regarding NMIs if passed, otherwise specify the vector.
*/
- if (vector == IPI_STOP_HARD)
+ if (vector >= IPI_NMI_FIRST)
icrlo |= APIC_DELMODE_NMI;
else
icrlo |= vector | APIC_DELMODE_FIXED;
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
index c23108cc7ad6..9e1cec241d51 100644
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -120,7 +120,7 @@ struct cpu_ops cpu_ops;
* Local data and functions.
*/
-static volatile cpuset_t ipi_nmi_pending;
+static volatile cpuset_t ipi_stop_nmi_pending;
/* used to hold the AP's until we are ready to release them */
struct mtx ap_boot_mtx;
@@ -894,7 +894,7 @@ ipi_selected(cpuset_t cpus, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
+ CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &cpus);
while ((cpu = CPU_FFS(&cpus)) != 0) {
cpu--;
@@ -917,7 +917,7 @@ ipi_cpu(int cpu, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
+ CPU_SET_ATOMIC(cpu, &ipi_stop_nmi_pending);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
@@ -944,7 +944,7 @@ ipi_all_but_self(u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
+ CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus);
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
@@ -962,10 +962,10 @@ ipi_nmi_handler()
* and should be handled.
*/
cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
+ if (!CPU_ISSET(cpuid, &ipi_stop_nmi_pending))
return (1);
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
+ CPU_CLR_ATOMIC(cpuid, &ipi_stop_nmi_pending);
cpustop_handler();
return (0);
}
diff --git a/sys/x86/x86/stack_machdep.c b/sys/x86/x86/stack_machdep.c
index 3ebf5a913b08..a56d423d4aa7 100644
--- a/sys/x86/x86/stack_machdep.c
+++ b/sys/x86/x86/stack_machdep.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2015 EMC Corporation
* Copyright (c) 2005 Antoine Brodin
* All rights reserved.
*
@@ -29,17 +30,21 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
-#include <x86/stack.h>
-
#include <machine/pcb.h>
+#include <machine/smp.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
+#include <x86/stack.h>
+
#ifdef __i386__
#define PCB_FP(pcb) ((pcb)->pcb_ebp)
#define TF_FP(tf) ((tf)->tf_ebp)
@@ -54,6 +59,14 @@ typedef struct i386_frame *x86_frame_t;
typedef struct amd64_frame *x86_frame_t;
#endif
+static struct stack *nmi_stack;
+static volatile struct thread *nmi_pending;
+
+#ifdef SMP
+static struct mtx nmi_lock;
+MTX_SYSINIT(nmi_lock, &nmi_lock, "stack_nmi", MTX_SPIN);
+#endif
+
static void
stack_capture(struct thread *td, struct stack *st, register_t fp)
{
@@ -78,6 +91,24 @@ stack_capture(struct thread *td, struct stack *st, register_t fp)
}
}
+int
+stack_nmi_handler(struct trapframe *tf)
+{
+
+ /* Don't consume an NMI that wasn't meant for us. */
+ if (nmi_stack == NULL || curthread != nmi_pending)
+ return (0);
+
+ if (INKERNEL(TF_PC(tf)))
+ stack_capture(curthread, nmi_stack, TF_FP(tf));
+ else
+ /* We interrupted a thread in user mode. */
+ nmi_stack->depth = 0;
+
+ atomic_store_rel_ptr((long *)&nmi_pending, (long)NULL);
+ return (1);
+}
+
void
stack_save_td(struct stack *st, struct thread *td)
{
@@ -90,6 +121,39 @@ stack_save_td(struct stack *st, struct thread *td)
stack_capture(td, st, PCB_FP(td->td_pcb));
}
+int
+stack_save_td_running(struct stack *st, struct thread *td)
+{
+
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ MPASS(TD_IS_RUNNING(td));
+
+ if (td == curthread) {
+ stack_save(st);
+ return (0);
+ }
+
+#ifdef SMP
+ mtx_lock_spin(&nmi_lock);
+
+ nmi_stack = st;
+ nmi_pending = td;
+ ipi_cpu(td->td_oncpu, IPI_TRACE);
+ while ((void *)atomic_load_acq_ptr((long *)&nmi_pending) != NULL)
+ cpu_spinwait();
+ nmi_stack = NULL;
+
+ mtx_unlock_spin(&nmi_lock);
+
+ if (st->depth == 0)
+ /* We interrupted a thread in user mode. */
+ return (EAGAIN);
+#else
+ KASSERT(0, ("curthread isn't running"));
+#endif
+ return (0);
+}
+
void
stack_save(struct stack *st)
{