Index: arch/amd64/include/memstr.h
===================================================================
--- arch/amd64/include/memstr.h	(revision 4b2c872d993209cc50bfe660125b5f9537183b14)
+++ arch/amd64/include/memstr.h	(revision 56d40fe76c4baab19f1462d818d58ef9abbf363d)
@@ -30,9 +30,107 @@
 #define __amd64_MEMSTR_H__
 
-#define memcpy(dst, src, cnt)  __builtin_memcpy((dst), (src), (cnt)); 
+/** Copy memory
+ *
+ * Copy a given number of bytes (3rd argument)
+ * from the memory location defined by 2nd argument
+ * to the memory location defined by 1st argument.
+ * The memory areas cannot overlap.
+ *
+ * @param dst Destination
+ * @param src Source
+ * @param cnt Number of bytes
+ * @return Destination
+ */
+static inline void * memcpy(void * dst, const void * src, size_t cnt)
+{
+        __native d0, d1, d2;
 
-extern void memsetw(__address dst, size_t cnt, __u16 x);
-extern void memsetb(__address dst, size_t cnt, __u8 x);
-extern int memcmp(__address src, __address dst, int cnt);
+        __asm__ __volatile__(
+                "rep movsq\n\t"
+                "movq %4, %%rcx\n\t"
+                "andq $7, %%rcx\n\t"
+                "jz 1f\n\t"
+                "rep movsb\n\t"
+                "1:\n"
+                : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+                : "0" ((__native)(cnt / 8)), "g" ((__native)cnt), "1" ((__native) dst), "2" ((__native) src)
+                : "memory");
+
+        return dst;
+}
+
+
+/** Compare memory regions for equality
+ *
+ * Compare a given number of bytes (3rd argument)
+ * at memory locations defined by 1st and 2nd argument
+ * for equality. If bytes are equal function returns 0.
+ *
+ * @param src Region 1
+ * @param dst Region 2
+ * @param cnt Number of bytes
+ * @return Zero if bytes are equal, non-zero otherwise
+ */
+static inline int memcmp(const void * src, const void * dst, size_t cnt)
+{
+	__native d0, d1, d2;
+	__native ret;
+	
+	__asm__ (
+		"repe cmpsb\n\t"
+		"je 1f\n\t"
+		"movq %3, %0\n\t"
+		"addq $1, %0\n\t"
+		"1:\n"
+		: "=a" (ret), "=%S" (d0), "=&D" (d1), "=&c" (d2)
+		: "0" (0), "1" (src), "2" (dst), "3" ((__native)cnt)
+	);
+	
+	return ret;
+}
+
+/** Fill memory with words
+ * Fill a given number of words (2nd argument)
+ * at memory defined by 1st argument with the
+ * word value defined by 3rd argument.
+ *
+ * @param dst Destination
+ * @param cnt Number of words
+ * @param x Value to fill
+ */
+static inline void memsetw(__address dst, size_t cnt, __u16 x)
+{
+	__native d0, d1;
+	
+	__asm__ __volatile__ (
+		"rep stosw\n\t"
+		: "=&D" (d0), "=&c" (d1), "=a" (x)
+		: "0" (dst), "1" ((__native)cnt), "2" (x)
+		: "memory"
+	);
+
+}
+
+/** Fill memory with bytes
+ * Fill a given number of bytes (2nd argument)
+ * at memory defined by 1st argument with the
+ * word value defined by 3rd argument.
+ *
+ * @param dst Destination
+ * @param cnt Number of bytes
+ * @param x Value to fill
+ */
+static inline void memsetb(__address dst, size_t cnt, __u8 x)
+{
+	__native d0, d1;
+	
+	__asm__ __volatile__ (
+		"rep stosb\n\t"
+		: "=&D" (d0), "=&c" (d1), "=a" (x)
+		: "0" (dst), "1" ((__native)cnt), "2" (x)
+		: "memory"
+	);
+
+}
 
 #endif
