aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2018-03-24 12:57:58 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2018-03-24 12:57:58 +0000
commita37d4032edf1245d5a5ef8ba3ac46c8ac175b54e (patch)
treefb38d52af6ab754b6d907ab3d796e2a259780533 /sys
parente24e5683361e88f6ac6f8b10b90ce87e07035512 (diff)
downloadsrc-a37d4032edf1245d5a5ef8ba3ac46c8ac175b54e.tar.gz
src-a37d4032edf1245d5a5ef8ba3ac46c8ac175b54e.zip
Improve the lcall $7,$0 syscall emulation on amd64.
Current code, which copies the potential syscall arguments into the current frame, puts an arbitrary limit on the number of syscall arguments. Apparently, mmap(2) and lseek(2) (?) require larger number. But there is an issue that stack is only need to be mapped to contain the number of arguments required by the syscall, so copying arbitrary large number of words from the stack is not completely safe. Use different approach to convert lcall frame into int $0x80 frame in place, by doing the retl in kernel. This also allows to stop proceed vfork case specially, and stop making assumptions about %cs at the syscall time. Also, improve comments with the formulations provided by bde. Reviewed and tested by: bde Sponsored by: The FreeBSD Foundation MFC after: 1 week
Notes
Notes: svn path=/head/; revision=331486
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/ia32/ia32_sigtramp.S43
-rw-r--r--sys/amd64/ia32/ia32_syscall.c28
2 files changed, 39 insertions, 32 deletions
diff --git a/sys/amd64/ia32/ia32_sigtramp.S b/sys/amd64/ia32/ia32_sigtramp.S
index 81d3cd9cd9bc..90df90a46e18 100644
--- a/sys/amd64/ia32/ia32_sigtramp.S
+++ b/sys/amd64/ia32/ia32_sigtramp.S
@@ -78,44 +78,23 @@ ia32_osigcode:
1:
jmp 1b
-
/*
- * The lcall $7,$0 emulator cannot use the call gate that does an
- * inter-privilege transition. The reason is that the call gate
- * does not disable interrupts, and, before the swapgs is
- * executed, we would have a window where the ring 0 code is
- * executed with the wrong gsbase.
+ * Our lcall $7,$0 handler remains in user mode (ring 3), since lcalls
+ * don't change the interrupt mask, so if this one went directly to the
+ * kernel then there would be a window with interrupts enabled in kernel
+ * mode, and all interrupt handlers would have to be almost as complicated
+ * as the NMI handler to support this.
*
- * Instead, set LDT descriptor 0 as code segment, which reflects
- * the lcall $7,$0 back to ring 3 trampoline. The trampoline sets up
- * the frame for int $0x80.
+ * Instead, convert the lcall to an int0x80 call. The kernel does most
+ * of the conversion by popping the lcall return values off the user
+ * stack and returning to them instead of to here, except when the
+ * conversion itself fails. Adjusting the stack here is impossible for
+ * vfork() and harder for other syscalls.
*/
ALIGN_TEXT
lcall_tramp:
- cmpl $SYS_vfork,%eax
- je 1f
- pushl %ebp
- movl %esp,%ebp
- pushl 0x24(%ebp) /* arg 6 */
- pushl 0x20(%ebp)
- pushl 0x1c(%ebp)
- pushl 0x18(%ebp)
- pushl 0x14(%ebp)
- pushl 0x10(%ebp) /* arg 1 */
- subl $4,%esp /* gap */
- int $0x80
- leavel
- lretl
-1:
- /*
- * vfork handling is special and relies on the libc stub saving
- * the return ip in %ecx. Also, we assume that the call was done
- * with ucode32 selector in %cs.
- */
int $0x80
- movl $0x33,4(%esp) /* GUCODE32_SEL | SEL_UPL */
- movl %ecx,(%esp)
- lretl
+1: jmp 1b
#endif
ALIGN_TEXT
diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c
index 124c878d35f5..b8275b3fe7e7 100644
--- a/sys/amd64/ia32/ia32_syscall.c
+++ b/sys/amd64/ia32/ia32_syscall.c
@@ -116,11 +116,39 @@ ia32_fetch_syscall_args(struct thread *td)
caddr_t params;
u_int32_t args[8], tmp;
int error, i;
+#ifdef COMPAT_43
+ u_int32_t eip;
+ int cs;
+#endif
p = td->td_proc;
frame = td->td_frame;
sa = &td->td_sa;
+#ifdef COMPAT_43
+ if (__predict_false(frame->tf_cs == 7 && frame->tf_rip == 2)) {
+ /*
+ * In lcall $7,$0 after int $0x80. Convert the user
+ * frame to what it would be for a direct int 0x80 instead
+ * of lcall $7,$0, by popping the lcall return address.
+ */
+ error = fueword32((void *)frame->tf_rsp, &eip);
+ if (error == -1)
+ return (EFAULT);
+ cs = fuword16((void *)(frame->tf_rsp + sizeof(u_int32_t)));
+ if (cs == -1)
+ return (EFAULT);
+
+ /*
+ * Unwind in-kernel frame after all stack frame pieces
+ * were successfully read.
+ */
+ frame->tf_rip = eip;
+ frame->tf_cs = cs;
+ frame->tf_rsp += 2 * sizeof(u_int32_t);
+ }
+#endif
+
params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
sa->code = frame->tf_rax;