Index: kernel/arch/amd64/include/proc/thread.h
===================================================================
--- kernel/arch/amd64/include/proc/thread.h	(revision 2829b3548b37cce65df20809dc2aa97fc53ae85c)
+++ kernel/arch/amd64/include/proc/thread.h	(revision 9cc0d7c4f78aa4efea62a2d761476193f7e7717d)
@@ -36,6 +36,12 @@
 #define KERN_amd64_THREAD_H_
 
+/* CAUTION: keep these in sync with low level assembly code in syscall_entry */
+#define SYSCALL_USTACK_RSP	0
+#define SYSCALL_KSTACK_RSP	1
+
 typedef struct {
 	unative_t tls;
+	/** User and kernel RSP for syscalls. */
+	uint64_t syscall_rsp[2];	
 } thread_arch_t;
 
Index: kernel/arch/amd64/src/asm_utils.S
===================================================================
--- kernel/arch/amd64/src/asm_utils.S	(revision 2829b3548b37cce65df20809dc2aa97fc53ae85c)
+++ kernel/arch/amd64/src/asm_utils.S	(revision 9cc0d7c4f78aa4efea62a2d761476193f7e7717d)
@@ -221,8 +221,9 @@
 
 	/*
-	 * Choose between version with error code and version without error code.
-	 * Both versions have to be of the same size. amd64 assembly is, however,
-	 * a little bit tricky. For instance, subq $0x80, %rsp and subq $0x78, %rsp
-	 * can result in two instructions with different op-code lengths.
+	 * Choose between version with error code and version without error
+	 * code. Both versions have to be of the same size. amd64 assembly is,
+	 * however, a little bit tricky. For instance, subq $0x80, %rsp and
+	 * subq $0x78, %rsp can result in two instructions with different
+	 * op-code lengths.
 	 * Therefore we align the interrupt handlers.
 	 */
@@ -270,32 +271,51 @@
 h_end:
 
-	
+## Low-level syscall handler
+# 
+# Registers on entry:
+#
+# @param rcx		Userspace return address.
+# @param r11		Userspace RLFAGS.
+#
+# @param rax		Syscall number.
+# @param rdi		1st syscall argument.
+# @param rsi		2nd syscall argument.
+# @param rdx		3rd syscall argument.
+# @param r10		4th syscall argument. Used instead of RCX because the
+#			SYSCALL instruction clobbers it.
+# @param r8		5th syscall argument.
+# @param r9		6th syscall argument.
+#
+# @return		Return value is in rax.
+#
 syscall_entry:
-	# Switch to hidden gs	
-	swapgs
-	# %gs:0 now points to pointer to stack page
-	mov %gs:0, %r10     # We have a ptr to stack page in r10
-	addq $PAGE_SIZE-16, %r10 # We need some space to store old %sp
-	
-	movq %rsp, 0(%r10)  # Save old stack pointer to stack
-	movq %r10, %rsp     # Change to new stack
-	pushq %rcx          # Return address
-	pushq %r11          # Save flags
-
-	# Switch back to remain consistent
-	swapgs 
-
+	swapgs			# Switch to hidden gs	
+	# 
+	# %gs:0			Scratch space for this thread's user RSP
+	# %gs:8			Address to be used as this thread's kernel RSP
+	#
+	movq %rsp, %gs:0	# Save this thread's user RSP
+	movq %gs:8, %rsp	# Set this thread's kernel RSP
+	swapgs			# Switch back to remain consistent
 	sti
-	movq %r9, %rcx      # Exchange last parameter as a third
-	
+	
+	pushq %rcx
+	pushq %r11
+
+	movq %r10, %rcx		# Copy the 4th argument where it is expected 
+	pushq %rax
 	call syscall_handler
-	cli                 # We will be touching stack pointer
+	addq $8, %rsp
 		
 	popq %r11
 	popq %rcx
-	movq 0(%rsp), %rsp
+
+	cli
+	swapgs
+	movq %gs:0, %rsp	# Restore the user RSP
+	swapgs
+
 	sysretq
-		
-		
+
 .data
 .global interrupt_handler_size
Index: kernel/arch/amd64/src/proc/scheduler.c
===================================================================
--- kernel/arch/amd64/src/proc/scheduler.c	(revision 2829b3548b37cce65df20809dc2aa97fc53ae85c)
+++ kernel/arch/amd64/src/proc/scheduler.c	(revision 9cc0d7c4f78aa4efea62a2d761476193f7e7717d)
@@ -57,10 +57,12 @@
 void before_thread_runs_arch(void)
 {
-	CPU->arch.tss->rsp0 = (uintptr_t) &THREAD->kstack[THREAD_STACK_SIZE-SP_DELTA];
+	CPU->arch.tss->rsp0 =
+	    (uintptr_t) &THREAD->kstack[THREAD_STACK_SIZE - SP_DELTA];
 
-	/* Syscall support - write address of thread stack pointer to 
-	 * hidden part of gs */
+	/*
+	 * Syscall support.
+	 */
 	swapgs();
-	write_msr(AMD_MSR_GS, (uint64_t)&THREAD->kstack);
+	write_msr(AMD_MSR_GS, (uintptr_t)THREAD->arch.syscall_rsp);
 	swapgs();
 
@@ -72,6 +74,5 @@
 	if (CPU->id < BKPOINTS_MAX)
 		breakpoint_add(&((the_t *) THREAD->kstack)->as, 
-			       BKPOINT_WRITE | BKPOINT_CHECK_ZERO,
-			       CPU->id);
+		    BKPOINT_WRITE | BKPOINT_CHECK_ZERO, CPU->id);
 #endif
 }
Index: kernel/arch/amd64/src/proc/thread.c
===================================================================
--- kernel/arch/amd64/src/proc/thread.c	(revision 2829b3548b37cce65df20809dc2aa97fc53ae85c)
+++ kernel/arch/amd64/src/proc/thread.c	(revision 9cc0d7c4f78aa4efea62a2d761476193f7e7717d)
@@ -42,4 +42,10 @@
 {
 	t->arch.tls = 0;
+	t->arch.syscall_rsp[SYSCALL_USTACK_RSP] = 0;
+	/*
+	 * Kernel RSP can be precalculated at thread creation time.
+	 */
+	t->arch.syscall_rsp[SYSCALL_KSTACK_RSP] =
+	    (uintptr_t)&t->kstack[PAGE_SIZE - sizeof(uint64_t)];
 }
 
Index: kernel/arch/amd64/src/syscall.c
===================================================================
--- kernel/arch/amd64/src/syscall.c	(revision 2829b3548b37cce65df20809dc2aa97fc53ae85c)
+++ kernel/arch/amd64/src/syscall.c	(revision 9cc0d7c4f78aa4efea62a2d761476193f7e7717d)
@@ -53,11 +53,11 @@
 	/* Setup syscall entry address */
 	
-	/* This is _mess_ - the 64-bit CS is argument+16,
-	 * the SS is argument+8. The order is:
+	/* This is _mess_ - the 64-bit CS is argument + 16,
+	 * the SS is argument + 8. The order is:
 	 * +0(KDATA_DES), +8(UDATA_DES), +16(UTEXT_DES)
 	 */
 	write_msr(AMD_MSR_STAR,
-		  ((uint64_t)(gdtselector(KDATA_DES) | PL_USER)<<48) \
-		  | ((uint64_t)(gdtselector(KTEXT_DES) | PL_KERNEL)<<32));
+	    ((uint64_t)(gdtselector(KDATA_DES) | PL_USER) << 48) |
+	    ((uint64_t)(gdtselector(KTEXT_DES) | PL_KERNEL) << 32));
 	write_msr(AMD_MSR_LSTAR, (uint64_t)syscall_entry);
 	/* Mask RFLAGS on syscall 
