Index: kernel/arch/sparc64/src/context.S
===================================================================
--- kernel/arch/sparc64/src/context.S	(revision 376646780eb95ff0b1d6e16fe65bc114963669cb)
+++ kernel/arch/sparc64/src/context.S	(revision 245e8399f3d58ad45fc1730f6b948b33ae3ab8df)
@@ -28,14 +28,6 @@
 
 #include <arch/context_offset.h>
-
-/**
- * Both context_save_arch() and context_restore_arch() are
- * leaf-optimized procedures. This kind of optimization
- * is very important and prevents any implicit window
- * spill/fill/clean traps in these very core kernel
- * functions.
- */
-	
-#include <arch/context_offset.h>
+#include <arch/arch.h>
+#include <arch/regdef.h>
 
 .text   
@@ -44,5 +36,15 @@
 .global context_restore_arch
 
+/*
+ * context_save_arch() is required not to create its own stack frame. See the
+ * generic context.h for explanation.
+ */
 context_save_arch:
+	#
+	# Force all our active register windows to memory so that we can find
+	# them there even if e.g. the thread is migrated to another processor.
+	#
+	flushw
+
 	CONTEXT_SAVE_ARCH_CORE %o0
 	retl
@@ -51,12 +53,20 @@
 context_restore_arch:
 	#
-	# Flush all active windows.
-	# This is essential, because CONTEXT_LOAD overwrites
-	# %sp of CWP - 1 with the value written to %fp of CWP.
-	# Flushing all active windows mitigates this problem
-	# as CWP - 1 becomes the overlap window.
+	# Forget all previous windows, they are not going to be needed again.
+	# Enforce a window fill on the next RESTORE instruction by setting
+	# CANRESTORE to zero and other window configuration registers
+	# accordingly. Note that the same can be achieved by executing the
+	# FLUSHW instruction, but since we don't need to remember the previous
+	# windows, we do the former and save thus some unnecessary window
+	# spills.
 	#
-	flushw
-	
+	rdpr %pstate, %l0
+	andn %l0, PSTATE_IE_BIT, %l1
+	wrpr %l1, %pstate
+	wrpr %g0, 0, %canrestore
+	wrpr %g0, 0, %otherwin
+	wrpr %g0, NWINDOWS - 2, %cansave
+	wrpr %l0, %pstate
+
 	CONTEXT_RESTORE_ARCH_CORE %o0
 	retl
Index: kernel/arch/sparc64/src/trap/trap_table.S
===================================================================
--- kernel/arch/sparc64/src/trap/trap_table.S	(revision 376646780eb95ff0b1d6e16fe65bc114963669cb)
+++ kernel/arch/sparc64/src/trap/trap_table.S	(revision 245e8399f3d58ad45fc1730f6b948b33ae3ab8df)
@@ -652,5 +652,5 @@
 	 * spilled to kernel memory (i.e. register window buffer). Moreover,
 	 * if the scheduler was called in the meantime, all valid windows
-	 * belonging to other threads were spilled by context_restore().
+	 * belonging to other threads were spilled by context_save().
 	 * If OTHERWIN is non-zero, then some userspace windows are still
 	 * valid. Others might have been spilled. However, the CWP pointer
Index: uspace/lib/libc/arch/sparc64/src/fibril.S
===================================================================
--- uspace/lib/libc/arch/sparc64/src/fibril.S	(revision 376646780eb95ff0b1d6e16fe65bc114963669cb)
+++ uspace/lib/libc/arch/sparc64/src/fibril.S	(revision 245e8399f3d58ad45fc1730f6b948b33ae3ab8df)
@@ -35,4 +35,8 @@
 
 context_save:
+	#
+	# We rely on the kernel to flush our active register windows to memory
+	# should a thread switch occur.
+	#
 	CONTEXT_SAVE_ARCH_CORE %o0
 	retl
@@ -42,9 +46,8 @@
 	#
 	# Flush all active windows.
-	# This is essential, because CONTEXT_LOAD overwrites
-	# %sp of CWP - 1 with the value written to %fp of CWP.
-	# Flushing all active windows mitigates this problem
-	# as CWP - 1 becomes the overlap window.
-	#		
+	# This is essential, because CONTEXT_RESTORE_ARCH_CORE overwrites %sp of
+	# CWP - 1 with the value written to %fp of CWP.  Flushing all active
+	# windows mitigates this problem as CWP - 1 becomes the overlap window.
+	#
 	flushw
 	
