Index: arch/amd64/_link.ld
===================================================================
--- arch/amd64/_link.ld	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/_link.ld	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -34,5 +34,5 @@
 		*(.rodata*);		/* string literals */
 		hardcoded_load_address = .;
-		QUAD(0xffffffff80008000);
+		QUAD(ktext_start - SIZEOF(.unmapped));
 		hardcoded_ktext_size = .;
 		QUAD(ktext_end - ktext_start + (unmapped_ktext_end - unmapped_ktext_start));
@@ -56,12 +56,13 @@
 		kdata_end = .;
 	}
+
+	_hardcoded_kernel_size = (ktext_end - ktext_start) + (unmapped_ktext_end - unmapped_ktext_start) + (kdata_end - kdata_start) + (unmapped_kdata_end - unmapped_kdata_start);
 	_boot_offset = 0x100000;
 	_ka2pa_offset = 0xffffffff80000000;
 	_map_address = _ka2pa_offset + _boot_offset;
 
-	_hardcoded_kernel_size = (ktext_end - ktext_start) + (unmapped_ktext_end - unmapped_ktext_start) + (kdata_end - kdata_start) + (unmapped_kdata_end - unmapped_kdata_start);
 
 	e820table_boot = e820table - _map_address;
 	e820counter_boot = e820counter - _map_address;
-	ap_bootstrap_gdtr = ap_bootstrap_gdtr_boot + _ka2pa_offset;
+	real_bootstrap_gdtr = real_bootstrap_gdtr_boot + _ka2pa_offset;
 }
Index: arch/amd64/include/asm.h
===================================================================
--- arch/amd64/include/asm.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/include/asm.h	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -141,4 +141,17 @@
 }
 
+/** Read CR0
+ *
+ * Return value in CR0
+ *
+ * @return Value read.
+ */
+static inline __u64 read_cr0(void) 
+{ 
+	__u64 v; 
+	__asm__ volatile ("movq %%cr0,%0" : "=r" (v)); 
+	return v; 
+}
+
 /** Read CR2
  *
@@ -147,5 +160,10 @@
  * @return Value read.
  */
-static inline __u64 read_cr2(void) { __u64 v; __asm__ volatile ("movq %%cr2,%0" : "=r" (v)); return v; }
+static inline __u64 read_cr2(void) 
+{ 
+	__u64 v; 
+	__asm__ volatile ("movq %%cr2,%0" : "=r" (v)); 
+	return v; 
+}
 
 /** Write CR3
@@ -155,5 +173,8 @@
  * @param v Value to be written.
  */
-static inline void write_cr3(__u64 v) { __asm__ volatile ("movq %0,%%cr3\n" : : "r" (v)); }
+static inline void write_cr3(__u64 v) 
+{ 
+	__asm__ volatile ("movq %0,%%cr3\n" : : "r" (v)); 
+}
 
 /** Read CR3
@@ -163,5 +184,10 @@
  * @return Value read.
  */
-static inline __u64 read_cr3(void) { __u64 v; __asm__ volatile ("movq %%cr3,%0" : "=r" (v)); return v; }
+static inline __u64 read_cr3(void) 
+{ 
+	__u64 v;
+	__asm__ volatile ("movq %%cr3,%0" : "=r" (v)); 
+	return v; 
+}
 
 
Index: arch/amd64/include/context.h
===================================================================
--- arch/amd64/include/context.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/include/context.h	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -34,5 +34,10 @@
 #endif
 
-#define SP_DELTA     8
+
+/* According to ABI the stack MUST be aligned on 
+ * 16-byte boundary. If it is not, the va_arg calling will
+ * panic sooner or later
+ */
+#define SP_DELTA     16
 
 struct context {
Index: arch/amd64/include/cpu.h
===================================================================
--- arch/amd64/include/cpu.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/include/cpu.h	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -56,4 +56,5 @@
 extern void set_efer_flag(int flag);
 extern __u64 read_efer_flag(void);
+void cpu_setup_fpu(void);
 
 #endif /* __ASM__ */
Index: arch/amd64/include/cpuid.h
===================================================================
--- arch/amd64/include/cpuid.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/include/cpuid.h	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -30,6 +30,10 @@
 #define __CPUID_H__
 
-#define AMD_CPUID_EXTENDED 0x80000001
+#define AMD_CPUID_EXTENDED   0x80000001
 #define AMD_EXT_NOEXECUTE    20
+
+#define INTEL_CPUID_STANDARD 0x1
+#define INTEL_SSE2           26
+#define INTEL_FXSAVE         24
 
 #ifndef __ASM__
Index: arch/amd64/include/fpu_context.h
===================================================================
--- arch/amd64/include/fpu_context.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ 	(revision )
@@ -1,37 +1,0 @@
-/*
- * Copyright (C) 2005 Martin Decky
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * - The name of the author may not be used to endorse or promote products
- *   derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __amd64_FPU_CONTEXT_H__
-#define __amd64_FPU_CONTEXT_H__
-
-#include <arch/types.h>
-
-struct fpu_context {
-};
-
-#endif
Index: arch/amd64/include/pm.h
===================================================================
--- arch/amd64/include/pm.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/include/pm.h	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -146,6 +146,5 @@
 
 extern struct ptr_16_64 gdtr;
-extern struct ptr_16_32 bsp_bootstrap_gdtr;
-extern struct ptr_16_32 ap_bootstrap_gdtr;
+extern struct ptr_16_32 real_bootstrap_gdtr;
 
 extern void pm_init(void);
Index: arch/amd64/src/amd64.c
===================================================================
--- arch/amd64/src/amd64.c	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/src/amd64.c	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -44,4 +44,5 @@
 #include <arch/cpuid.h>
 #include <arch/acpi/acpi.h>
+#include <panic.h>
 
 void arch_pre_mm_init(void)
@@ -50,10 +51,18 @@
 
 	cpuid(AMD_CPUID_EXTENDED,&cpuid_s);
-	if (! (cpuid_s.cpuid_edx & (1<<AMD_EXT_NOEXECUTE))) {
-		printf("We do not support NX!!-----------\n");
-		printf("%X------\n",cpuid_s.cpuid_edx);
-		cpu_halt();
-	}
+	if (! (cpuid_s.cpuid_edx & (1<<AMD_EXT_NOEXECUTE)))
+		panic("Processor does not support No-execute pages.\n");
+
+	cpuid(INTEL_CPUID_STANDARD,&cpuid_s);
+	if (! (cpuid_s.cpuid_edx & (1<<INTEL_FXSAVE)))
+		panic("Processor does not support FXSAVE/FXRESTORE.\n");
+	
+	if (! (cpuid_s.cpuid_edx & (1<<INTEL_SSE2)))
+		panic("Processor does not support SSE2 instructions.\n");
+
+	/* Enable No-execute pages */
 	set_efer_flag(AMD_NXE_FLAG);
+	/* Enable FPU */
+	cpu_setup_fpu();
 
 	pm_init();
Index: arch/amd64/src/boot/boot.S
===================================================================
--- arch/amd64/src/boot/boot.S	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/src/boot/boot.S	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -71,5 +71,5 @@
 
 	# Load gdtr, idtr
-	lgdt bsp_bootstrap_gdtr
+	lgdt real_bootstrap_gdtr_boot
 	
 	movl %cr0,%eax
@@ -167,6 +167,6 @@
 	.quad ptl_1 + (PTL_WRITABLE | PTL_PRESENT)
 
-.global gdtr_inst				
-bsp_bootstrap_gdtr:
+.global real_bootstrap_gdtr_boot
+real_bootstrap_gdtr_boot:
 	.word gdtselector(GDT_ITEMS)
 	.long KA2PA(gdt)-BOOT_OFFSET
Index: arch/amd64/src/cpu/cpu.c
===================================================================
--- arch/amd64/src/cpu/cpu.c	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/src/cpu/cpu.c	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -61,9 +61,39 @@
 };
 
+
+/** Setup flags on processor so that we can use the FPU
+ *
+ * cr0.osfxsr = 1 -> we do support fxstor/fxrestor
+ * cr0.em = 0 -> we do not emulate coprocessor
+ * cr0.mp = 1 -> we do want lazy context switch
+ */
+void cpu_setup_fpu(void)
+{
+	__asm__ volatile (
+		"movq %%cr0, %%rax;"
+		"btsq $1, %%rax;" /* cr0.mp */
+		"btrq $2, %%rax;"  /* cr0.em */
+		"movq %%rax, %%cr0;"
+
+		"movq %%cr4, %%rax;"
+		"bts $9, %%rax;" /* cr4.osfxsr */
+		"movq %%rax, %%cr4;"
+		:
+		:
+		:"%rax"
+		);
+}
+
+/** Set the TS flag to 1. 
+ *
+ * If a thread accesses coprocessor, exception is run, which 
+ * does a lazy fpu context switch.
+ *
+ */
 void set_TS_flag(void)
 {
 	__asm__	volatile (
 		"mov %%cr0,%%rax;"
-		"or $8,%%rax;"
+		"bts $3,%%rax;"
 		"mov %%rax,%%cr0;"
 		:
@@ -77,5 +107,5 @@
 	__asm__	volatile (
 		"mov %%cr0,%%rax;"
-		"btc $4,%%rax;"
+		"btr $3,%%rax;"
 		"mov %%rax,%%cr0;"
 		:
Index: arch/amd64/src/fpu_context.c
===================================================================
--- arch/amd64/src/fpu_context.c	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
+++ arch/amd64/src/fpu_context.c	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2005 Jakub Vana
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * - The name of the author may not be used to endorse or promote products
+ *   derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <fpu_context.h>
+#include <arch.h>
+#include <cpu.h>
+
+void fpu_context_save(fpu_context_t *fctx)
+{
+}
+
+void fpu_context_restore(fpu_context_t *fctx)
+{
+	if(THREAD==CPU->fpu_owner) 
+		reset_TS_flag();
+	else {
+		set_TS_flag(); 
+		if (CPU->fpu_owner != NULL)
+			CPU->fpu_owner->fpu_context_engaged=1;
+	}
+}
+
+
+void fpu_lazy_context_save(fpu_context_t *fctx)
+{
+	/* TODO: We need malloc that allocates on 16-byte boundary !! */
+	if (((__u64)fctx) & 0xf)
+		fctx = (fpu_context_t *)((((__u64)fctx) | 0xf) + 1);
+
+	__asm__ volatile (
+		"fxsave %0"
+		: "=m"(*fctx)
+		);
+}
+
+void fpu_lazy_context_restore(fpu_context_t *fctx)
+{
+	/* TODO: We need malloc that allocates on 16-byte boundary !! */
+	if (((__u64)fctx) & 0xf)
+		fctx = (fpu_context_t *)((((__u64)fctx) | 0xf) + 1);
+	__asm__ volatile (
+		"fxrstor %0"
+		: "=m"(*fctx)
+		);
+}
+
+void fpu_init(void)
+{
+	__asm__ volatile (
+		"fninit;"
+	);
+}
Index: arch/amd64/src/interrupt.c
===================================================================
--- arch/amd64/src/interrupt.c	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/src/interrupt.c	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -38,20 +38,27 @@
 #include <arch.h>
 #include <symtab.h>
+#include <arch/asm.h>
 
-#define PRINT_INFO_ERRCODE(x) { \
+#define PRINT_INFO_ERRCODE(n,x) { \
 	char *symbol = get_symtab_entry(stack[1]); \
 	if (!symbol) \
 		symbol = ""; \
-	printf("----------------EXCEPTION OCCURED----------------\n"); \
+	printf("-----EXCEPTION(%d) OCCURED----- ( %s )\n",n,__FUNCTION__); \
 	printf("%%rip: %Q (%s)\n",x[1],symbol); \
 	printf("ERROR_WORD=%Q\n", x[0]); \
-	printf("%%rcs=%Q,flags=%Q\n", x[2], x[3]); \
-	printf("%%rax=%Q, %%rbx=%Q, %%rcx=%Q\n",x[-1],x[-2],x[-3]); \
-	printf("%%rdx=%Q, %%rsi=%Q, %%rdi=%Q\n",x[-4],x[-5],x[-6]); \
-	printf("%%r8 =%Q, %%r9 =%Q, %%r10=%Q\n",x[-7],x[-8],x[-9]); \
-	printf("%%r11=%Q, %%r12=%Q, %%r13=%Q\n",x[-10],x[-11],x[-12]); \
-	printf("%%r14=%Q, %%r15=%Q, %%rsp=%Q\n",x[-13],x[-14],x); \
+	printf("%%rcs=%Q,flags=%Q, %%cr0=%Q\n", x[2], x[3],read_cr0()); \
+	printf("%%rax=%Q, %%rbx=%Q, %%rcx=%Q\n",x[-2],x[-3],x[-4]); \
+	printf("%%rdx=%Q, %%rsi=%Q, %%rdi=%Q\n",x[-5],x[-6],x[-7]); \
+	printf("%%r8 =%Q, %%r9 =%Q, %%r10=%Q\n",x[-8],x[-9],x[-10]); \
+	printf("%%r11=%Q, %%r12=%Q, %%r13=%Q\n",x[-11],x[-12],x[-13]); \
+	printf("%%r14=%Q, %%r15=%Q, %%rsp=%Q\n",x[-14],x[-15],x); \
+	printf("%%rbp=%Q\n",x[-1]); \
 	printf("stack: %Q, %Q, %Q\n", x[5], x[6], x[7]); \
 	printf("       %Q, %Q, %Q\n", x[8], x[9], x[10]); \
+	printf("       %Q, %Q, %Q\n", x[11], x[12], x[13]); \
+	printf("       %Q, %Q, %Q\n", x[14], x[15], x[16]); \
+	printf("       %Q, %Q, %Q\n", x[17], x[18], x[19]); \
+	printf("       %Q, %Q, %Q\n", x[20], x[21], x[22]); \
+	printf("       %Q, %Q, %Q\n", x[23], x[24], x[25]); \
         }
 
@@ -91,6 +98,5 @@
 void null_interrupt(__u8 n, __native stack[])
 {
-	printf("----------------EXCEPTION OCCURED----------------\n");
-	printf("int %d: null_interrupt\n", n);
+	printf("-----EXCEPTION(%d) OCCURED----- ( %s )\n",n,__FUNCTION__); \
 	printf("stack: %L, %L, %L, %L\n", stack[0], stack[1], stack[2], stack[3]);
 	panic("unserviced interrupt\n");
@@ -99,5 +105,5 @@
 void gp_fault(__u8 n, __native stack[])
 {
-	PRINT_INFO_ERRCODE(stack);
+	PRINT_INFO_ERRCODE(n,stack);
 	panic("general protection fault\n");
 }
@@ -105,5 +111,5 @@
 void ss_fault(__u8 n, __native stack[])
 {
-	PRINT_INFO_ERRCODE(stack);
+	PRINT_INFO_ERRCODE(n,stack);
 	panic("stack fault\n");
 }
@@ -113,10 +119,15 @@
 {
 	reset_TS_flag();
-	if ((CPU->fpu_owner)!=NULL) {  
-		fpu_lazy_context_save(&((CPU->fpu_owner)->saved_fpu_context));
-		(CPU->fpu_owner)->fpu_context_engaged=0; /* don't prevent migration */
+	if (CPU->fpu_owner != NULL) {  
+		fpu_lazy_context_save(&CPU->fpu_owner->saved_fpu_context);
+		/* don't prevent migration */
+		CPU->fpu_owner->fpu_context_engaged=0; 
 	}
-	if(THREAD->fpu_context_exists) fpu_lazy_context_restore(&(THREAD->saved_fpu_context));
-	else {fpu_init();THREAD->fpu_context_exists=1;}
+	if (THREAD->fpu_context_exists)
+		fpu_lazy_context_restore(&THREAD->saved_fpu_context);
+	else {
+		fpu_init();
+		THREAD->fpu_context_exists=1;
+	}
 	CPU->fpu_owner=THREAD;
 }
@@ -126,5 +137,5 @@
 void page_fault(__u8 n, __native stack[])
 {
-	PRINT_INFO_ERRCODE(stack);
+	PRINT_INFO_ERRCODE(n,stack);
 	printf("Page fault address: %Q\n", read_cr2());
 	panic("page fault\n");
Index: arch/amd64/src/smp/ap.S
===================================================================
--- arch/amd64/src/smp/ap.S	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/amd64/src/smp/ap.S	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -55,5 +55,5 @@
 	movw %ax, %ds
 
-	lgdt ap_bootstrap_gdtr_boot	# initialize Global Descriptor Table register
+	lgdt real_bootstrap_gdtr_boot	# initialize Global Descriptor Table register
 	
 	movl %cr0, %eax
@@ -97,13 +97,7 @@
 .code64
 start64:
-	movq $ctx, %rax
-	movq 0(%rax), %rsp
+	movq (ctx), %rsp
 	call main_ap   # never returns
 	
-.global ap_bootstrap_gdtr_boot
-ap_bootstrap_gdtr_boot:
-	.word gdtselector(GDT_ITEMS)
-	.long KA2PA(gdt)
-
 	
 #endif /* __SMP__ */
Index: arch/ia32/include/fpu_context.h
===================================================================
--- arch/ia32/include/fpu_context.h	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/ia32/include/fpu_context.h	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -33,5 +33,6 @@
 
 struct fpu_context {
-	__u8 fpu[512]; 		/* FXSAVE & FXRSTOR storage area */
+	/* TODO: We need malloc that aligns structures on 16-byte boundary */
+	__u8 fpu[512+16]; 		/* FXSAVE & FXRSTOR storage area */
 };
 
Index: arch/ia32/src/fpu_context.c
===================================================================
--- arch/ia32/src/fpu_context.c	(revision 005384ad95744d08b0ce1d6d971b7af3df1e9611)
+++ arch/ia32/src/fpu_context.c	(revision 3396f59d66f8fec98e7d11c13ae9cf5cd21b38ef)
@@ -55,5 +55,4 @@
 		: "=m"(*fctx)
 		);
-	return;
 }
 
@@ -64,5 +63,4 @@
 		: "=m"(*fctx)
 		);
-	return;
 }
 
