Index: kernel/generic/include/cpu.h
===================================================================
--- kernel/generic/include/cpu.h	(revision 1c1767f9c4047ab3772e598d8e02b7ab54cd0c5e)
+++ kernel/generic/include/cpu.h	(revision 2593999761151633e3726d421b2f67d1f2bcbbe0)
@@ -74,4 +74,6 @@
 	bool idle;
 	uint64_t last_cycle;
+
+	context_t scheduler_context;
 } cpu_local_t;
 
Index: kernel/generic/src/main/main.c
===================================================================
--- kernel/generic/src/main/main.c	(revision 1c1767f9c4047ab3772e598d8e02b7ab54cd0c5e)
+++ kernel/generic/src/main/main.c	(revision 2593999761151633e3726d421b2f67d1f2bcbbe0)
@@ -287,5 +287,6 @@
 	 * starting the thread of kernel threads.
 	 */
-	scheduler_run();
+	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
+	context_replace(scheduler_run, CPU_LOCAL->stack, STACK_SIZE);
 	/* not reached */
 }
@@ -327,6 +328,4 @@
 	ARCH_OP(post_cpu_init);
 
-	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
-
 	/*
 	 * If we woke kmp up before we left the kernel stack, we could
@@ -334,4 +333,5 @@
 	 * switch to this cpu's private stack prior to waking kmp up.
 	 */
+	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
 	context_replace(main_ap_separated_stack, CPU_LOCAL->stack, STACK_SIZE);
 	/* not reached */
Index: kernel/generic/src/proc/scheduler.c
===================================================================
--- kernel/generic/src/proc/scheduler.c	(revision 1c1767f9c4047ab3772e598d8e02b7ab54cd0c5e)
+++ kernel/generic/src/proc/scheduler.c	(revision 2593999761151633e3726d421b2f67d1f2bcbbe0)
@@ -1,4 +1,5 @@
 /*
  * Copyright (c) 2010 Jakub Jermar
+ * Copyright (c) 2023 Jiří Zárevúcky
  * All rights reserved.
  *
@@ -50,5 +51,4 @@
 #include <time/delay.h>
 #include <arch/asm.h>
-#include <arch/faddr.h>
 #include <arch/cycle.h>
 #include <atomic.h>
@@ -66,6 +66,4 @@
 #include <stacktrace.h>
 
-static void scheduler_separated_stack(void);
-
 atomic_size_t nrdy;  /**< Number of ready threads in the system. */
 
@@ -227,4 +225,6 @@
 static void relink_rq(int start)
 {
+	assert(interrupts_disabled());
+
 	if (CPU_LOCAL->current_clock_tick < CPU_LOCAL->relink_deadline)
 		return;
@@ -302,15 +302,4 @@
 }
 
-void scheduler_run(void)
-{
-	assert(interrupts_disabled());
-	assert(THREAD == NULL);
-	assert(CPU != NULL);
-
-	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
-	context_replace(scheduler_separated_stack, CPU_LOCAL->stack, STACK_SIZE);
-	unreachable();
-}
-
 /** Things to do before we switch to THREAD context.
  */
@@ -421,11 +410,5 @@
 }
 
-/** The scheduler
- *
- * The thread scheduling procedure.
- * Passes control directly to
- * scheduler_separated_stack().
- *
- */
+/** Switch to scheduler context to let other threads run. */
 void scheduler_enter(state_t new_state)
 {
@@ -436,4 +419,7 @@
 
 	fpu_cleanup();
+
+	if (atomic_load(&haltstate))
+		halt();
 
 	irq_spinlock_lock(&THREAD->lock, false);
@@ -460,27 +446,7 @@
 	 *
 	 */
+
 	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
-
-	/*
-	 * We may not keep the old stack.
-	 * Reason: If we kept the old stack and got blocked, for instance, in
-	 * find_best_thread(), the old thread could get rescheduled by another
-	 * CPU and overwrite the part of its own stack that was also used by
-	 * the scheduler on this CPU.
-	 *
-	 * Moreover, we have to bypass the compiler-generated POP sequence
-	 * which is fooled by SP being set to the very top of the stack.
-	 * Therefore the scheduler() function continues in
-	 * scheduler_separated_stack().
-	 *
-	 */
-	context_t ctx;
-	context_create(&ctx, scheduler_separated_stack,
-	    CPU_LOCAL->stack, STACK_SIZE);
-
-	/* Switch to scheduler context and store current thread's context. */
-	context_swap(&THREAD->saved_context, &ctx);
-
-	/* Returned from scheduler. */
+	context_swap(&THREAD->saved_context, &CPU_LOCAL->scheduler_context);
 
 	irq_spinlock_unlock(&THREAD->lock, false);
@@ -488,21 +454,45 @@
 }
 
-/** Scheduler stack switch wrapper
- *
- * Second part of the scheduler() function
- * using new stack. Handling the actual context
- * switch to a new thread.
- *
- */
-void scheduler_separated_stack(void)
-{
-	assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
+/** Enter main scheduler loop. Never returns.
+ *
+ * This function switches to a runnable thread as soon as one is available,
+ * after which it is only switched back to if a thread is stopping and there is
+ * no other thread to run in its place. We need a separate context for that
+ * because we're going to block the CPU, which means we need another context
+ * to clean up after the previous thread.
+ */
+void scheduler_run(void)
+{
+	assert(interrupts_disabled());
+
 	assert(CPU != NULL);
+	assert(TASK == NULL);
+	assert(THREAD == NULL);
 	assert(interrupts_disabled());
 
-	if (atomic_load(&haltstate))
-		halt();
-
-	if (THREAD) {
+	while (!atomic_load(&haltstate)) {
+		assert(CURRENT->mutex_locks == 0);
+
+		int rq_index;
+		THREAD = find_best_thread(&rq_index);
+
+		prepare_to_run_thread(rq_index);
+
+		/*
+		 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
+		 * thread's stack.
+		 */
+		current_copy(CURRENT, (current_t *) THREAD->kstack);
+
+		/* Switch to thread context. */
+		context_swap(&CPU_LOCAL->scheduler_context, &THREAD->saved_context);
+
+		/* Back from the thread. */
+
+		assert(CPU != NULL);
+		assert(THREAD != NULL);
+		assert(irq_spinlock_locked(&THREAD->lock));
+		assert(interrupts_disabled());
+
 		state_t state = THREAD->state;
 		irq_spinlock_unlock(&THREAD->lock, false);
@@ -510,21 +500,12 @@
 		cleanup_after_thread(THREAD, state);
 
+		/*
+		 * Necessary because we're allowing interrupts in find_best_thread(),
+		 * so we need to avoid other code referencing the thread we left.
+		 */
 		THREAD = NULL;
 	}
 
-	int rq_index;
-	THREAD = find_best_thread(&rq_index);
-
-	prepare_to_run_thread(rq_index);
-
-	/*
-	 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
-	 * thread's stack.
-	 */
-	current_copy(CURRENT, (current_t *) THREAD->kstack);
-
-	context_restore(&THREAD->saved_context);
-
-	/* Not reached */
+	halt();
 }
 
Index: kernel/generic/src/proc/thread.c
===================================================================
--- kernel/generic/src/proc/thread.c	(revision 1c1767f9c4047ab3772e598d8e02b7ab54cd0c5e)
+++ kernel/generic/src/proc/thread.c	(revision 2593999761151633e3726d421b2f67d1f2bcbbe0)
@@ -246,4 +246,5 @@
 void thread_ready(thread_t *thread)
 {
+	// TODO: move this to scheduler.c
 	irq_spinlock_lock(&thread->lock, true);
 
Index: kernel/generic/src/time/clock.c
===================================================================
--- kernel/generic/src/time/clock.c	(revision 1c1767f9c4047ab3772e598d8e02b7ab54cd0c5e)
+++ kernel/generic/src/time/clock.c	(revision 2593999761151633e3726d421b2f67d1f2bcbbe0)
@@ -123,4 +123,5 @@
 static void cpu_update_accounting(void)
 {
+	// FIXME: get_cycle() is unimplemented on several platforms
 	uint64_t now = get_cycle();
 	atomic_time_increment(&CPU->busy_cycles, now - CPU_LOCAL->last_cycle);
