Index: arch/ia32/include/asm.h
===================================================================
--- arch/ia32/include/asm.h	(revision 650d976475a957b2f94d1e918d703d66fa25b4a3)
+++ arch/ia32/include/asm.h	(revision 104dc0b57379eb9a5c6034b0a00f473408a9c5cd)
@@ -139,6 +139,6 @@
 	pri_t v;
 	__asm__ volatile (
-		"pushf\n"
-		"popl %0\n"
+		"pushf\n\t"
+		"popl %0\n\t"
 		"sti\n"
 		: "=r" (v)
@@ -155,6 +155,6 @@
 	pri_t v;
 	__asm__ volatile (
-		"pushf\n"
-		"popl %0\n"
+		"pushf\n\t"
+		"popl %0\n\t"
 		"cli\n"
 		: "=r" (v)
@@ -169,5 +169,5 @@
 static inline void cpu_priority_restore(pri_t pri) {
 	__asm__ volatile (
-		"pushl %0\n"
+		"pushl %0\n\t"
 		"popf\n"
 		: : "r" (pri)
@@ -182,5 +182,5 @@
 	pri_t v;
 	__asm__ volatile (
-		"pushf\n"
+		"pushf\n\t"
 		"popl %0\n"
 		: "=r" (v)
@@ -213,4 +213,34 @@
 }
 
+/** Copy memory
+ * 
+ * Copy a given number of bytes (3rd argument)
+ * from the memory location defined by 2nd argument
+ * to the memory location defined by 1st argument.
+ * The memory areas cannot overlap.
+ *
+ * @param destination
+ * @param source
+ * @param number of bytes
+ * @return destination
+ */
+static inline void * memcpy(void * dst, const void * src, size_t cnt)
+{
+	__u32 d0, d1, d2;
+	
+	__asm__ __volatile__(
+		"rep movsl\n\t"
+		"movl %4, %%ecx\n\t"
+		"andl $3, %%ecx\n\t"
+		"jz 1f\n\t"
+		"rep movsb\n\t"
+		"1:\n"
+		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		: "0" (cnt / 4), "g" (cnt), "1" ((__u32) dst), "2" ((__u32) src)
+		: "memory");
+		
+	return dst;
+}
+
 
 #endif
Index: arch/ia32/src/asm.S
===================================================================
--- arch/ia32/src/asm.S	(revision 650d976475a957b2f94d1e918d703d66fa25b4a3)
+++ arch/ia32/src/asm.S	(revision 104dc0b57379eb9a5c6034b0a00f473408a9c5cd)
@@ -38,5 +38,4 @@
 .global enable_l_apic_in_msr
 .global interrupt_handlers
-.global memcpy
 .global memsetb
 .global memsetw
@@ -153,29 +152,4 @@
 #	handler 192 256
 h_end:
-
-
-## Copy memory
-#
-# Copy a given number of bytes (3rd argument)
-# from the memory location defined by 2nd argument
-# to the memory location defined by 1st argument.
-# The memory areas cannot overlap.
-#
-SRC=16
-DST=12
-CNT=20
-memcpy:
-	push %esi
-	push %edi
-
-	movl CNT(%esp),%ecx
-	movl DST(%esp),%edi
-	movl SRC(%esp),%esi
-
-	rep movsb %ds:(%esi),%es:(%edi)
-
-	pop %edi
-	pop %esi
-	ret
 
 
Index: include/memstr.h
===================================================================
--- include/memstr.h	(revision 650d976475a957b2f94d1e918d703d66fa25b4a3)
+++ include/memstr.h	(revision 104dc0b57379eb9a5c6034b0a00f473408a9c5cd)
@@ -32,6 +32,5 @@
 #include <typedefs.h>
 #include <arch/types.h>
-
-#define memcpy(dst, src, cnt)	__builtin_memcpy((dst), (src), (cnt)); 
+#include <arch/asm.h>
 
 extern void memsetw(__address dst, size_t cnt, __u16 x);
