Index: kernel/arch/amd64/include/syscall.h
===================================================================
--- kernel/arch/amd64/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/amd64/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -36,6 +36,4 @@
 #define KERN_amd64_SYSCALL_H_
 
-#include <arch/types.h>
-
 extern void syscall_setup_cpu(void);
 
Index: kernel/arch/ia32/Makefile.inc
===================================================================
--- kernel/arch/ia32/Makefile.inc	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/ia32/Makefile.inc	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -161,3 +161,4 @@
 	arch/$(ARCH)/src/boot/memmap.c \
 	arch/$(ARCH)/src/fpu_context.c \
-	arch/$(ARCH)/src/debugger.c
+	arch/$(ARCH)/src/debugger.c \
+	arch/$(ARCH)/src/syscall.c
Index: kernel/arch/ia32/include/asm.h
===================================================================
--- kernel/arch/ia32/include/asm.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/ia32/include/asm.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -248,4 +248,20 @@
 }
 
+/** Write to MSR */
+static inline void write_msr(uint32_t msr, uint64_t value)
+{
+	asm volatile ("wrmsr" : : "c" (msr), "a" ((uint32_t)(value)),
+	    "d" ((uint32_t)(value >> 32)));
+}
+
+static inline uint64_t read_msr(uint32_t msr)
+{
+	uint32_t ax, dx;
+
+	asm volatile ("rdmsr" : "=a"(ax), "=d"(dx) : "c" (msr));
+	return ((uint64_t)dx << 32) | ax;
+}
+
+
 /** Return base address of current stack
  *
Index: kernel/arch/ia32/include/cpu.h
===================================================================
--- kernel/arch/ia32/include/cpu.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/ia32/include/cpu.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -36,9 +36,18 @@
 #define KERN_ia32_CPU_H_
 
+#define EFLAGS_IF       (1 << 9)
+#define EFLAGS_RF       (1 << 16)
+
+#define CR4_OSFXSR_MASK (1<<9)
+
+/* Support for SYSENTER and SYSEXIT */
+#define IA32_MSR_SYSENTER_CS	0x174
+#define IA32_MSR_SYSENTER_ESP	0x175
+#define IA32_MSR_SYSENTER_EIP	0x176
+
+#ifndef __ASM__
+
 #include <arch/pm.h>
 #include <arch/asm.h>
-
-#define EFLAGS_IF       (1 << 9)
-#define EFLAGS_RF       (1 << 16)
 
 typedef struct {
@@ -52,6 +61,5 @@
 } cpu_arch_t;
 
-
-#define CR4_OSFXSR_MASK (1<<9)
+#endif
 
 #endif
Index: kernel/arch/ia32/include/syscall.h
===================================================================
--- kernel/arch/ia32/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
+++ kernel/arch/ia32/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -0,0 +1,1 @@
+../../amd64/include/syscall.h
Index: kernel/arch/ia32/src/asm.S
===================================================================
--- kernel/arch/ia32/src/asm.S	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/ia32/src/asm.S	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -147,4 +147,43 @@
 	popfl
 .endm	
+
+/*
+ * The SYSENTER syscall mechanism can be used for syscalls with
+ * four or fewer arguments. To pass these four arguments, we
+ * use four registers: EDX, ECX, EBX, ESI. The syscall number
+ * is passed in EAX. We use EDI to remember the return address
+ * and EBP to remember the stack. The INT-based syscall mechanism
+ * can actually handle six arguments plus the syscall number
+ * entirely in registers.
+ */
+.global sysenter_handler
+sysenter_handler:
+	pushl %ebp	# remember user stack
+	pushl %edi	# remember return user address
+
+	pushl %gs	# remember TLS
+
+	pushl %eax	# syscall number
+	subl $8, %esp	# unused sixth and fifth argument
+	pushl %esi	# fourth argument
+	pushl %ebx	# third argument
+	pushl %ecx	# second argument
+	pushl %edx	# first argument
+
+	movw $16, %ax
+	movw %ax, %ds
+	movw %ax, %es
+
+	cld
+	call syscall_handler
+	addl $28, %esp	# remove arguments from stack
+
+	pop %gs		# restore TLS
+
+	pop %edx	# prepare return EIP for SYSEXIT
+	pop %ecx	# prepare userspace ESP for SYSEXIT
+
+	sysexit		# return to userspace
+
 
 ## Declare interrupt handlers
Index: kernel/arch/ia32/src/cpu/cpu.c
===================================================================
--- kernel/arch/ia32/src/cpu/cpu.c	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/ia32/src/cpu/cpu.c	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -43,4 +43,5 @@
 
 #include <arch/smp/apic.h>
+#include <arch/syscall.h>
 
 /*
@@ -124,4 +125,7 @@
 		);
 	}
+	
+	/* Setup fast SYSENTER/SYSEXIT syscalls */
+	syscall_setup_cpu();
 }
 
Index: kernel/arch/ia32/src/proc/scheduler.c
===================================================================
--- kernel/arch/ia32/src/proc/scheduler.c	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ kernel/arch/ia32/src/proc/scheduler.c	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -59,6 +59,12 @@
 void before_thread_runs_arch(void)
 {
-	CPU->arch.tss->esp0 = (uintptr_t) &THREAD->kstack[THREAD_STACK_SIZE -
+	uintptr_t kstk = (uintptr_t) &THREAD->kstack[THREAD_STACK_SIZE -
 	    SP_DELTA];
+
+	/* Set kernel stack for CP3 -> CPL0 switch via SYSENTER */
+	write_msr(IA32_MSR_SYSENTER_ESP, kstk);
+
+	/* Set kernel stack for CPL3 -> CPL0 switch via interrupt */
+	CPU->arch.tss->esp0 = kstk;
 	CPU->arch.tss->ss0 = selector(KDATA_DES);
 
Index: kernel/arch/ia32/src/syscall.c
===================================================================
--- kernel/arch/ia32/src/syscall.c	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
+++ kernel/arch/ia32/src/syscall.c	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2008 Jakub Jermar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * - The name of the author may not be used to endorse or promote products
+ *   derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** @addtogroup ia32	
+ * @{
+ */
+/** @file
+ */
+
+#include <arch/syscall.h>
+#include <arch/cpu.h>
+#include <arch/asm.h>
+#include <arch/types.h>
+#include <arch/pm.h>
+
+/** Enable & setup support for SYSENTER/SYSEXIT */
+void syscall_setup_cpu(void)
+{
+	extern void sysenter_handler(void);
+
+	/* set kernel mode CS selector */
+	write_msr(IA32_MSR_SYSENTER_CS, selector(KTEXT_DES));
+	/* set kernel mode entry point */
+	write_msr(IA32_MSR_SYSENTER_EIP, (uint32_t) sysenter_handler);
+}
+
+/** @}
+ */
Index: uspace/app/trace/trace.c
===================================================================
--- uspace/app/trace/trace.c	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/app/trace/trace.c	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -36,5 +36,4 @@
 #include <stdlib.h>
 #include <unistd.h>
-#include <syscall.h>
 #include <ipc/ipc.h>
 #include <fibril.h>
@@ -44,4 +43,6 @@
 #include <task.h>
 #include <loader/loader.h>
+
+#include <libc.h>
 
 // Temporary: service and method names
Index: uspace/lib/libc/arch/amd64/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/amd64/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/amd64/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -37,4 +37,6 @@
 #define LIBC_amd64_SYSCALL_H_
 
+#define LIBARCH_SYSCALL_GENERIC
+
 #include <syscall.h>
 
Index: uspace/lib/libc/arch/arm32/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/arm32/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/arm32/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -31,9 +31,11 @@
  */
 /** @file
- *  @brief Empty.
+ *  @brief
  */
 
 #ifndef LIBC_arm32_SYSCALL_H_
 #define LIBC_arm32_SYSCALL_H_
+
+#define LIBARCH_SYSCALL_GENERIC
 
 #include <syscall.h>
Index: uspace/lib/libc/arch/ia32/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/ia32/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/ia32/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -37,5 +37,22 @@
 #define LIBC_ia32_SYSCALL_H_
 
-#include <syscall.h>
+#include <sys/types.h>
+#include <kernel/syscall/syscall.h>
+
+#define __syscall0	__syscall_sysenter
+#define __syscall1	__syscall_sysenter
+#define __syscall2	__syscall_sysenter
+#define __syscall3	__syscall_sysenter
+#define __syscall4	__syscall_sysenter
+#define __syscall5	__syscall_int
+#define __syscall6	__syscall_int
+
+extern sysarg_t
+__syscall_sysenter(const sysarg_t, const sysarg_t, const sysarg_t, const sysarg_t,
+     const sysarg_t, const sysarg_t, const syscall_t);
+
+extern sysarg_t
+__syscall_int(const sysarg_t, const sysarg_t, const sysarg_t, const sysarg_t,
+     const sysarg_t, const sysarg_t, const syscall_t);
 
 #endif
Index: uspace/lib/libc/arch/ia32/src/syscall.S
===================================================================
--- uspace/lib/libc/arch/ia32/src/syscall.S	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/ia32/src/syscall.S	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -29,5 +29,5 @@
 .text
 
-/** Syscall wrapper.
+/** Syscall wrapper - INT $0x30 version.
  *
  * Mind the order of arguments. First two arguments and the syscall number go to
@@ -35,6 +35,6 @@
  * could benefit from this and not save unused registers on the stack.
  */
-.global __syscall
-__syscall:
+.global __syscall_int
+__syscall_int:
 	pushl %ebx
 	pushl %esi
@@ -54,2 +54,37 @@
 	popl %ebx
 	ret
+
+
+/** Syscall wrapper - SYSENTER version.
+ *
+ * This is an optimized version of syscall for four or less arguments.  Note
+ * that EBP and EDI are used to remember user stack address and the return
+ * address. The kernel part doesn't save DS, ES and FS so the handler restores
+ * these to the selector immediately following CS (it must be the flat data
+ * segment, otherwise the SYSENTER wouldn't work in the first place).
+ */
+.global __syscall_sysenter
+__syscall_sysenter:
+	pushl %ebx
+	pushl %esi
+	pushl %edi
+	pushl %ebp
+	mov %esp, %ebp
+	lea ra, %edi
+	movl 20(%esp), %edx	# First argument.
+	movl 24(%esp), %ecx	# Second argument.
+	movl 28(%esp), %ebx	# Third argument.
+	movl 32(%esp), %esi	# Fourth argument.
+	movl 44(%esp), %eax	# Syscall number.
+	sysenter
+ra:
+	movw %cs, %cx
+	addw $8, %cx
+	movw %cx, %ds
+	movw %cx, %es
+	movw %cx, %fs
+	popl %ebp
+	popl %edi
+	popl %esi
+	popl %ebx
+	ret
Index: uspace/lib/libc/arch/ia64/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/ia64/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/ia64/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -37,4 +37,6 @@
 #define LIBC_ia64_SYSCALL_H_
 
+#define LIBARCH_SYSCALL_GENERIC
+
 #include <syscall.h>
 
Index: uspace/lib/libc/arch/mips32/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/mips32/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/mips32/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -37,4 +37,6 @@
 #define LIBC_mips32_SYSCALL_H_
 
+#define LIBARCH_SYSCALL_GENERIC
+
 #include <syscall.h>
 
Index: uspace/lib/libc/arch/ppc32/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/ppc32/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/ppc32/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -37,4 +37,6 @@
 #define LIBC_ppc32_SYSCALL_H_
 
+#define LIBARCH_SYSCALL_GENERIC
+
 #include <syscall.h>
 
Index: uspace/lib/libc/arch/ppc64/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/ppc64/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/ppc64/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -37,4 +37,6 @@
 #define LIBC_ppc64_SYSCALL_H_
 
+#define LIBARCH_SYSCALL_GENERIC
+
 #include <syscall.h>
 
Index: uspace/lib/libc/arch/sparc64/include/syscall.h
===================================================================
--- uspace/lib/libc/arch/sparc64/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/arch/sparc64/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -39,4 +39,12 @@
 #include <kernel/syscall/syscall.h>
 
+#define __syscall0	__syscall
+#define __syscall1	__syscall
+#define __syscall2	__syscall
+#define __syscall3	__syscall
+#define __syscall4	__syscall
+#define __syscall5	__syscall
+#define __syscall6	__syscall
+
 static inline sysarg_t
 __syscall(const sysarg_t p1, const sysarg_t p2, const sysarg_t p3,
Index: uspace/lib/libc/generic/udebug.c
===================================================================
--- uspace/lib/libc/generic/udebug.c	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/generic/udebug.c	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -35,5 +35,4 @@
 #include <udebug.h>
 #include <sys/types.h>
-#include <syscall.h>
 #include <ipc/ipc.h>
 #include <async.h>
Index: uspace/lib/libc/include/libc.h
===================================================================
--- uspace/lib/libc/include/libc.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/include/libc.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -40,12 +40,12 @@
 #include <libarch/syscall.h>
 
-#define __SYSCALL0(id) __syscall(0, 0, 0, 0, 0, 0, id)
-#define __SYSCALL1(id, p1) __syscall(p1, 0, 0, 0, 0, 0, id)
-#define __SYSCALL2(id, p1, p2) __syscall(p1, p2, 0, 0, 0, 0, id)
-#define __SYSCALL3(id, p1, p2, p3) __syscall(p1, p2, p3, 0, 0, 0, id)
-#define __SYSCALL4(id, p1, p2, p3, p4) __syscall(p1, p2, p3, p4, 0, 0, id)
-#define __SYSCALL5(id, p1, p2, p3, p4, p5) __syscall(p1, p2, p3, p4, p5, 0, id)
+#define __SYSCALL0(id) __syscall0(0, 0, 0, 0, 0, 0, id)
+#define __SYSCALL1(id, p1) __syscall1(p1, 0, 0, 0, 0, 0, id)
+#define __SYSCALL2(id, p1, p2) __syscall2(p1, p2, 0, 0, 0, 0, id)
+#define __SYSCALL3(id, p1, p2, p3) __syscall3(p1, p2, p3, 0, 0, 0, id)
+#define __SYSCALL4(id, p1, p2, p3, p4) __syscall4(p1, p2, p3, p4, 0, 0, id)
+#define __SYSCALL5(id, p1, p2, p3, p4, p5) __syscall5(p1, p2, p3, p4, p5, 0, id)
 #define __SYSCALL6(id, p1, p2, p3, p4, p5, p6) \
-    __syscall(p1, p2, p3, p4, p5, p6,id)
+    __syscall6(p1, p2, p3, p4, p5, p6, id)
 
 extern void __main(void *pcb_ptr);
Index: uspace/lib/libc/include/syscall.h
===================================================================
--- uspace/lib/libc/include/syscall.h	(revision 1d132ae9692ac9338b444c7d793baca54ce3e632)
+++ uspace/lib/libc/include/syscall.h	(revision f2ef7fd587b7ec8f9d3293862101dfa7dbf2ecf6)
@@ -33,5 +33,6 @@
  * @file
  * @brief	Syscall function declaration for architectures that don't
- *		inline syscalls.
+ *		inline syscalls or architectures that handle syscalls
+ *		according to the number of arguments.
  */
 
@@ -39,6 +40,18 @@
 #define LIBC_SYSCALL_H_
 
+#ifndef	LIBARCH_SYSCALL_GENERIC
+#error "You can't include this file directly."
+#endif
+
 #include <sys/types.h>
 #include <kernel/syscall/syscall.h>
+
+#define __syscall0	__syscall
+#define __syscall1	__syscall
+#define __syscall2	__syscall
+#define __syscall3	__syscall
+#define __syscall4	__syscall
+#define __syscall5	__syscall
+#define __syscall6	__syscall
 
 extern sysarg_t __syscall(const sysarg_t p1, const sysarg_t p2,
