Index: libc/arch/ia64/include/psthread.h
===================================================================
--- libc/arch/ia64/include/psthread.h	(revision 07d78705131fd1e3b5ec9c46c4d0949018348976)
+++ libc/arch/ia64/include/psthread.h	(revision b1facba77ebeff75ea5f58364467ffb0e9bf1aa8)
@@ -39,4 +39,5 @@
 #include <align.h>
 #include <libarch/stack.h>
+#include <arch/types.h>
 
 /*
@@ -108,4 +109,27 @@
 	 */
 	uint64_t pr;
+
+	__r128 f2 __attribute__ ((aligned(16)));
+	__r128 f3;
+	__r128 f4;
+	__r128 f5;
+
+	__r128 f16;
+	__r128 f17;
+	__r128 f18;
+	__r128 f19;
+	__r128 f20;
+	__r128 f21;
+	__r128 f22;
+	__r128 f23;
+	__r128 f24;
+	__r128 f25;
+	__r128 f26;
+	__r128 f27;
+	__r128 f28;
+	__r128 f29;
+	__r128 f30;
+	__r128 f31;
+
 } context_t;
 
Index: libc/arch/ia64/src/psthread.S
===================================================================
--- libc/arch/ia64/src/psthread.S	(revision 07d78705131fd1e3b5ec9c46c4d0949018348976)
+++ libc/arch/ia64/src/psthread.S	(revision b1facba77ebeff75ea5f58364467ffb0e9bf1aa8)
@@ -105,6 +105,31 @@
 	 */
 	mov loc2 = pr		;;
-	st8 [in0] = loc2, 8	;;
-	
+	st8 [in0] = loc2, 16;; 		/* Next fpu registers should be spilled to 16B aligned address */
+
+	/*
+	 * Save floating-point registers.
+	 */
+	stf.spill [in0] = f2, 16 ;;
+	stf.spill [in0] = f3, 16 ;;
+	stf.spill [in0] = f4, 16 ;;
+	stf.spill [in0] = f5, 16 ;;
+
+	stf.spill [in0] = f16, 16 ;;
+	stf.spill [in0] = f17, 16 ;;
+	stf.spill [in0] = f18, 16 ;;
+	stf.spill [in0] = f19, 16 ;;
+	stf.spill [in0] = f20, 16 ;;
+	stf.spill [in0] = f21, 16 ;;
+	stf.spill [in0] = f22, 16 ;;
+	stf.spill [in0] = f23, 16 ;;
+	stf.spill [in0] = f24, 16 ;;
+	stf.spill [in0] = f25, 16 ;;
+	stf.spill [in0] = f26, 16 ;;
+	stf.spill [in0] = f27, 16 ;;
+	stf.spill [in0] = f28, 16 ;;
+	stf.spill [in0] = f29, 16 ;;
+	stf.spill [in0] = f30, 16 ;;
+	stf.spill [in0] = f31, 16 ;;	
+
 	mov ar.unat = loc1
 	
@@ -188,6 +213,31 @@
 	 * Restore predicate registers
 	 */
-	ld8 loc2 = [in0], 8	;;
+	ld8 loc2 = [in0], 16	;;
 	mov pr = loc2, ~0
+
+	/*
+	 * Restore floating-point registers.
+	 */
+	ldf.fill f2 = [in0], 16 ;;
+	ldf.fill f3 = [in0], 16 ;;
+	ldf.fill f4 = [in0], 16 ;;
+	ldf.fill f5 = [in0], 16 ;;
+
+	ldf.fill f16 = [in0], 16 ;;
+	ldf.fill f17 = [in0], 16 ;;
+	ldf.fill f18 = [in0], 16 ;;
+	ldf.fill f19 = [in0], 16 ;;
+	ldf.fill f20 = [in0], 16 ;;
+	ldf.fill f21 = [in0], 16 ;;
+	ldf.fill f22 = [in0], 16 ;;
+	ldf.fill f23 = [in0], 16 ;;
+	ldf.fill f24 = [in0], 16 ;;
+	ldf.fill f25 = [in0], 16 ;;
+	ldf.fill f26 = [in0], 16 ;;
+	ldf.fill f27 = [in0], 16 ;;
+	ldf.fill f28 = [in0], 16 ;;
+	ldf.fill f29 = [in0], 16 ;;
+	ldf.fill f30 = [in0], 16 ;;
+	ldf.fill f31 = [in0], 16 ;;
 	
 	mov ar.unat = loc1
