Index: arch/ppc32/include/atomic.h
===================================================================
--- arch/ppc32/include/atomic.h	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/include/atomic.h	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -34,15 +34,30 @@
 typedef struct { volatile __u32 count; } atomic_t;
 
-/*
- * TODO: these are just placeholders for real implementations of atomic_inc and atomic_dec.
- * WARNING: the following functions cause the code to be preemption-unsafe !!!
- */
+static inline void atomic_inc(atomic_t *val) {
+	__u32 tmp;
 
-static inline void atomic_inc(atomic_t *val) {
-	val->count++;
+	asm __volatile__ (
+		"1:\n"
+		"lwarx %0, 0, %2\n"
+		"addic %0, %0, 1\n"
+		"stwcx. %0, 0, %2\n"
+		"bne- 1b"
+		: "=&r" (tmp), "=m" (val->count)
+		: "r" (&val->count), "m" (val->count)
+		: "cc");
 }
 
 static inline void atomic_dec(atomic_t *val) {
-	val->count--;
+	__u32 tmp;
+
+	asm __volatile__(
+		"1:\n"
+		"lwarx %0, 0, %2\n"
+		"addic %0, %0, -1\n"
+		"stwcx.	%0, 0, %2\n"
+		"bne- 1b"
+		: "=&r" (tmp), "=m" (val->count)
+		: "r" (&val->count), "m" (val->count)
+		: "cc");
 }
 
Index: arch/ppc32/include/barrier.h
===================================================================
--- arch/ppc32/include/barrier.h	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/include/barrier.h	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -33,7 +33,7 @@
 #define CS_LEAVE_BARRIER()	__asm__ volatile ("" ::: "memory")
 
-#define memory_barrier()
-#define read_barrier()
-#define write_barrier()
+#define memory_barrier() __asm__ volatile ("sync" ::: "memory")
+#define read_barrier() __asm__ volatile ("sync" ::: "memory")
+#define write_barrier() __asm__ volatile ("eieio" ::: "memory")
 
 #endif
Index: arch/ppc32/include/boot/boot.h
===================================================================
--- arch/ppc32/include/boot/boot.h	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/include/boot/boot.h	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -32,3 +32,6 @@
 #define BOOT_OFFSET		0x2000
 
+/* Temporary stack size for boot process */
+#define TEMP_STACK_SIZE 0x100
+
 #endif
Index: arch/ppc32/include/context.h
===================================================================
--- arch/ppc32/include/context.h	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/include/context.h	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -34,5 +34,5 @@
 #endif
 
-#define SP_DELTA	4
+#define SP_DELTA	8
 
 struct context {
Index: arch/ppc32/include/drivers/cuda.h
===================================================================
--- arch/ppc32/include/drivers/cuda.h	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/include/drivers/cuda.h	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -31,3 +31,6 @@
 
 
+void cuda_init(void);
+
+
 #endif
Index: arch/ppc32/src/boot/boot.S
===================================================================
--- arch/ppc32/src/boot/boot.S	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/src/boot/boot.S	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -36,4 +36,9 @@
 .global kernel_image_start
 kernel_image_start:
+
+	# load temporary stack
+	
+	lis sp, end_stack@ha
+	addi sp, sp, end_stack@l
 	
 	# r10 contains physical address to memmap_t
@@ -70,2 +75,7 @@
 	
 	b main_bsp
+
+.section K_DATA_START, "aw", @progbits
+	
+	.space TEMP_STACK_SIZE
+end_stack:
Index: arch/ppc32/src/drivers/cuda.c
===================================================================
--- arch/ppc32/src/drivers/cuda.c	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/src/drivers/cuda.c	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -49,4 +49,9 @@
 
 
+void cuda_init(void)
+{
+}
+
+
 static void cuda_packet(const __u8 data)
 {
Index: arch/ppc32/src/exception.S
===================================================================
--- arch/ppc32/src/exception.S	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/src/exception.S	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -85,4 +85,5 @@
 .global exc_decrementer
 exc_decrementer:
+	rfi
 	b exc_decrementer
 
Index: arch/ppc32/src/ppc32.c
===================================================================
--- arch/ppc32/src/ppc32.c	(revision edc89bd06235d952cbfa29940f60b97cb3bf9c02)
+++ arch/ppc32/src/ppc32.c	(revision 5f62ef99a9c5aa3020510303c82ab8fcbfa0b83b)
@@ -29,5 +29,5 @@
 #include <arch.h>
 #include <arch/console.h>
-
+#include <arch/drivers/cuda.h>
 #include <arch/mm/memory_init.h>
 
@@ -35,4 +35,5 @@
 {
 	ppc32_console_init();
+	cuda_init();
 }
 
