Index: kernel/generic/include/atomic.h
===================================================================
--- kernel/generic/include/atomic.h	(revision 286da52c77dc3bd30ef7b617138baded44d5dbb5)
+++ kernel/generic/include/atomic.h	(revision efed95a34b7293205857407388da4ff8ff9a92f1)
@@ -39,4 +39,14 @@
 #include <typedefs.h>
 #include <stdatomic.h>
+
+/*
+ * Shorthand for relaxed atomic read/write, something that's needed to formally
+ * avoid undefined behavior in cases where we need to read a variable in
+ * different threads and we don't particularly care about ordering
+ * (e.g. statistic printouts). This is most likely translated into the same
+ * assembly instructions as regular read/writes.
+ */
+#define atomic_set_unordered(var, val) atomic_store_explicit((var), (val), memory_order_relaxed)
+#define atomic_get_unordered(var) atomic_load_explicit((var), memory_order_relaxed)
 
 #define atomic_predec(val) \
Index: kernel/generic/include/proc/thread.h
===================================================================
--- kernel/generic/include/proc/thread.h	(revision 286da52c77dc3bd30ef7b617138baded44d5dbb5)
+++ kernel/generic/include/proc/thread.h	(revision efed95a34b7293205857407388da4ff8ff9a92f1)
@@ -138,5 +138,5 @@
 
 	/** Thread CPU. */
-	cpu_t *cpu;
+	_Atomic(cpu_t *) cpu;
 	/** Containing task. */
 	task_t *task;
Index: kernel/generic/src/proc/scheduler.c
===================================================================
--- kernel/generic/src/proc/scheduler.c	(revision 286da52c77dc3bd30ef7b617138baded44d5dbb5)
+++ kernel/generic/src/proc/scheduler.c	(revision efed95a34b7293205857407388da4ff8ff9a92f1)
@@ -311,5 +311,5 @@
 
 	irq_spinlock_lock(&THREAD->lock, false);
-	assert(THREAD->cpu == CPU);
+	assert(atomic_get_unordered(&THREAD->cpu) == CPU);
 
 	THREAD->state = Running;
@@ -387,5 +387,5 @@
 
 	assert(thread->state == Running);
-	assert(thread->cpu == CPU);
+	assert(atomic_get_unordered(&thread->cpu) == CPU);
 
 	int i = (thread->priority < RQ_COUNT - 1) ?
@@ -411,8 +411,10 @@
 
 	/* Prefer the CPU on which the thread ran last */
-	if (!thread->cpu)
-		thread->cpu = CPU;
-
-	cpu_t *cpu = thread->cpu;
+	cpu_t *cpu = atomic_get_unordered(&thread->cpu);
+
+	if (!cpu) {
+		cpu = CPU;
+		atomic_set_unordered(&thread->cpu, CPU);
+	}
 
 	irq_spinlock_unlock(&thread->lock, false);
@@ -656,5 +658,5 @@
 
 		thread->stolen = true;
-		thread->cpu = CPU;
+		atomic_set_unordered(&thread->cpu, CPU);
 
 		irq_spinlock_unlock(&thread->lock, false);
Index: kernel/generic/src/proc/thread.c
===================================================================
--- kernel/generic/src/proc/thread.c	(revision 286da52c77dc3bd30ef7b617138baded44d5dbb5)
+++ kernel/generic/src/proc/thread.c	(revision efed95a34b7293205857407388da4ff8ff9a92f1)
@@ -198,5 +198,5 @@
 {
 	irq_spinlock_lock(&thread->lock, true);
-	thread->cpu = cpu;
+	atomic_set_unordered(&thread->cpu, cpu);
 	thread->nomigrate++;
 	irq_spinlock_unlock(&thread->lock, true);
@@ -263,5 +263,5 @@
 	    ((flags & THREAD_FLAG_UNCOUNTED) == THREAD_FLAG_UNCOUNTED);
 	thread->priority = -1;          /* Start in rq[0] */
-	thread->cpu = NULL;
+	atomic_init(&thread->cpu, NULL);
 	thread->stolen = false;
 	thread->uspace =
@@ -343,5 +343,6 @@
 	/* Clear cpu->fpu_owner if set to this thread. */
 #ifdef CONFIG_FPU_LAZY
-	if (thread->cpu) {
+	cpu_t *cpu = atomic_get_unordered(&thread->cpu);
+	if (cpu) {
 		/*
 		 * We need to lock for this because the old CPU can concurrently try
@@ -349,15 +350,10 @@
 		 * it to finish. An atomic compare-and-swap wouldn't be enough.
 		 */
-		irq_spinlock_lock(&thread->cpu->fpu_lock, false);
-
-		thread_t *owner = atomic_load_explicit(&thread->cpu->fpu_owner,
-		    memory_order_relaxed);
-
-		if (owner == thread) {
-			atomic_store_explicit(&thread->cpu->fpu_owner, NULL,
-			    memory_order_relaxed);
-		}
-
-		irq_spinlock_unlock(&thread->cpu->fpu_lock, false);
+		irq_spinlock_lock(&cpu->fpu_lock, false);
+
+		if (atomic_get_unordered(&cpu->fpu_owner) == thread)
+			atomic_set_unordered(&cpu->fpu_owner, NULL);
+
+		irq_spinlock_unlock(&cpu->fpu_lock, false);
 	}
 #endif
@@ -707,6 +703,7 @@
 
 	if (additional) {
-		if (thread->cpu)
-			printf("%-5u", thread->cpu->id);
+		cpu_t *cpu = atomic_get_unordered(&thread->cpu);
+		if (cpu)
+			printf("%-5u", cpu->id);
 		else
 			printf("none ");
Index: kernel/generic/src/sysinfo/stats.c
===================================================================
--- kernel/generic/src/sysinfo/stats.c	(revision 286da52c77dc3bd30ef7b617138baded44d5dbb5)
+++ kernel/generic/src/sysinfo/stats.c	(revision efed95a34b7293205857407388da4ff8ff9a92f1)
@@ -308,7 +308,9 @@
 	stats_thread->kcycles = thread->kcycles;
 
-	if (thread->cpu != NULL) {
+	cpu_t *cpu = atomic_get_unordered(&thread->cpu);
+
+	if (cpu != NULL) {
 		stats_thread->on_cpu = true;
-		stats_thread->cpu = thread->cpu->id;
+		stats_thread->cpu = cpu->id;
 	} else
 		stats_thread->on_cpu = false;
