source: mainline/kernel/generic/src/proc/scheduler.c@ f3dbe27

ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since f3dbe27 was f3dbe27, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 years ago

Reduce locking further with lazy FPU

It turns out we only need a lock to synchronize between the trap
handler and thread destructor. The atomic operations introduced
are just plain reads and writes, written in an ugly fashion to
appease C11 undefined behavior gods.

In principle we could get rid of that if we made cpu_t::fpu_owner
a strong reference, but that would mean a thread structure could
be held in limbo indefinitely if a new thread is not being
scheduled or doesn't use FPU.

  • Property mode set to 100644
File size: 16.2 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup kernel_generic_proc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <assert.h>
42#include <atomic.h>
43#include <proc/scheduler.h>
44#include <proc/thread.h>
45#include <proc/task.h>
46#include <mm/frame.h>
47#include <mm/page.h>
48#include <mm/as.h>
49#include <time/timeout.h>
50#include <time/delay.h>
51#include <arch/asm.h>
52#include <arch/faddr.h>
53#include <arch/cycle.h>
54#include <atomic.h>
55#include <synch/spinlock.h>
56#include <config.h>
57#include <context.h>
58#include <fpu_context.h>
59#include <halt.h>
60#include <arch.h>
61#include <adt/list.h>
62#include <panic.h>
63#include <cpu.h>
64#include <stdio.h>
65#include <log.h>
66#include <stacktrace.h>
67
68static void scheduler_separated_stack(void);
69
70atomic_size_t nrdy; /**< Number of ready threads in the system. */
71
72/** Take actions before new thread runs.
73 *
74 * Perform actions that need to be
75 * taken before the newly selected
76 * thread is passed control.
77 *
78 * THREAD->lock is locked on entry
79 *
80 */
81static void before_thread_runs(void)
82{
83 before_thread_runs_arch();
84
85#ifdef CONFIG_FPU_LAZY
86 /*
87 * The only concurrent modification possible for fpu_owner here is
88 * another thread changing it from itself to NULL in its destructor.
89 */
90 thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
91 memory_order_relaxed);
92
93 if (THREAD == owner)
94 fpu_enable();
95 else
96 fpu_disable();
97#elif defined CONFIG_FPU
98 fpu_enable();
99 if (THREAD->fpu_context_exists)
100 fpu_context_restore(&THREAD->fpu_context);
101 else {
102 fpu_init();
103 THREAD->fpu_context_exists = true;
104 }
105#endif
106
107#ifdef CONFIG_UDEBUG
108 if (THREAD->btrace) {
109 istate_t *istate = THREAD->udebug.uspace_state;
110 if (istate != NULL) {
111 printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
112 stack_trace_istate(istate);
113 }
114
115 THREAD->btrace = false;
116 }
117#endif
118}
119
120/** Take actions after THREAD had run.
121 *
122 * Perform actions that need to be
123 * taken after the running thread
124 * had been preempted by the scheduler.
125 *
126 * THREAD->lock is locked on entry
127 *
128 */
129static void after_thread_ran(void)
130{
131 after_thread_ran_arch();
132}
133
134#ifdef CONFIG_FPU_LAZY
135void scheduler_fpu_lazy_request(void)
136{
137 fpu_enable();
138
139 /* We need this lock to ensure synchronization with thread destructor. */
140 irq_spinlock_lock(&CPU->fpu_lock, false);
141
142 /* Save old context */
143 thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
144 if (owner != NULL) {
145 fpu_context_save(&owner->fpu_context);
146 atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
147 }
148
149 irq_spinlock_unlock(&CPU->fpu_lock, false);
150
151 if (THREAD->fpu_context_exists) {
152 fpu_context_restore(&THREAD->fpu_context);
153 } else {
154 fpu_init();
155 THREAD->fpu_context_exists = true;
156 }
157
158 atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
159}
160#endif /* CONFIG_FPU_LAZY */
161
162/** Initialize scheduler
163 *
164 * Initialize kernel scheduler.
165 *
166 */
167void scheduler_init(void)
168{
169}
170
171/** Get thread to be scheduled
172 *
173 * Get the optimal thread to be scheduled
174 * according to thread accounting and scheduler
175 * policy.
176 *
177 * @return Thread to be scheduled.
178 *
179 */
180static thread_t *find_best_thread(void)
181{
182 assert(CPU != NULL);
183
184loop:
185 if (atomic_load(&CPU->nrdy) == 0) {
186 /*
187 * For there was nothing to run, the CPU goes to sleep
188 * until a hardware interrupt or an IPI comes.
189 * This improves energy saving and hyperthreading.
190 */
191 CPU->idle = true;
192
193 /*
194 * Go to sleep with interrupts enabled.
195 * Ideally, this should be atomic, but this is not guaranteed on
196 * all platforms yet, so it is possible we will go sleep when
197 * a thread has just become available.
198 */
199 cpu_interruptible_sleep();
200
201 /* Interrupts are disabled again. */
202 goto loop;
203 }
204
205 assert(!CPU->idle);
206
207 unsigned int i;
208 for (i = 0; i < RQ_COUNT; i++) {
209 irq_spinlock_lock(&(CPU->rq[i].lock), false);
210 if (CPU->rq[i].n == 0) {
211 /*
212 * If this queue is empty, try a lower-priority queue.
213 */
214 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
215 continue;
216 }
217
218 atomic_dec(&CPU->nrdy);
219 atomic_dec(&nrdy);
220 CPU->rq[i].n--;
221
222 /*
223 * Take the first thread from the queue.
224 */
225 thread_t *thread = list_get_instance(
226 list_first(&CPU->rq[i].rq), thread_t, rq_link);
227 list_remove(&thread->rq_link);
228
229 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
230
231 thread->cpu = CPU;
232 thread->priority = i; /* Correct rq index */
233
234 /* Time allocation in microseconds. */
235 uint64_t time_to_run = (i + 1) * 10000;
236
237 /* This is safe because interrupts are disabled. */
238 CPU->preempt_deadline = CPU->current_clock_tick + us2ticks(time_to_run);
239
240 /*
241 * Clear the stolen flag so that it can be migrated
242 * when load balancing needs emerge.
243 */
244 thread->stolen = false;
245 irq_spinlock_unlock(&thread->lock, false);
246
247 return thread;
248 }
249
250 goto loop;
251}
252
253static void switch_task(task_t *task)
254{
255 /* If the task stays the same, a lot of work is avoided. */
256 if (TASK == task)
257 return;
258
259 as_t *old_as = AS;
260 as_t *new_as = task->as;
261
262 /* It is possible for two tasks to share one address space. */
263 if (old_as != new_as)
264 as_switch(old_as, new_as);
265
266 if (TASK)
267 task_release(TASK);
268
269 TASK = task;
270
271 task_hold(TASK);
272
273 before_task_runs_arch();
274}
275
276/** Prevent rq starvation
277 *
278 * Prevent low priority threads from starving in rq's.
279 *
280 * When the function decides to relink rq's, it reconnects
281 * respective pointers so that in result threads with 'pri'
282 * greater or equal start are moved to a higher-priority queue.
283 *
284 * @param start Threshold priority.
285 *
286 */
287static void relink_rq(int start)
288{
289 if (CPU->current_clock_tick < CPU->relink_deadline)
290 return;
291
292 CPU->relink_deadline = CPU->current_clock_tick + NEEDS_RELINK_MAX;
293
294 /* Temporary cache for lists we are moving. */
295 list_t list;
296 list_initialize(&list);
297
298 size_t n = 0;
299
300 /* Move every list (except the one with highest priority) one level up. */
301 for (int i = RQ_COUNT - 1; i > start; i--) {
302 irq_spinlock_lock(&CPU->rq[i].lock, false);
303
304 /* Swap lists. */
305 list_swap(&CPU->rq[i].rq, &list);
306
307 /* Swap number of items. */
308 size_t tmpn = CPU->rq[i].n;
309 CPU->rq[i].n = n;
310 n = tmpn;
311
312 irq_spinlock_unlock(&CPU->rq[i].lock, false);
313 }
314
315 /* Append the contents of rq[start + 1] to rq[start]. */
316 if (n != 0) {
317 irq_spinlock_lock(&CPU->rq[start].lock, false);
318 list_concat(&CPU->rq[start].rq, &list);
319 CPU->rq[start].n += n;
320 irq_spinlock_unlock(&CPU->rq[start].lock, false);
321 }
322}
323
324void scheduler(void)
325{
326 ipl_t ipl = interrupts_disable();
327
328 if (atomic_load(&haltstate))
329 halt();
330
331 if (THREAD) {
332 irq_spinlock_lock(&THREAD->lock, false);
333 }
334
335 scheduler_locked(ipl);
336}
337
338/** The scheduler
339 *
340 * The thread scheduling procedure.
341 * Passes control directly to
342 * scheduler_separated_stack().
343 *
344 */
345void scheduler_locked(ipl_t ipl)
346{
347 assert(CPU != NULL);
348
349 if (THREAD) {
350 /* Update thread kernel accounting */
351 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
352
353#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
354 fpu_context_save(&THREAD->fpu_context);
355#endif
356 if (!context_save(&THREAD->saved_context)) {
357 /*
358 * This is the place where threads leave scheduler();
359 */
360
361 /* Save current CPU cycle */
362 THREAD->last_cycle = get_cycle();
363
364 irq_spinlock_unlock(&THREAD->lock, false);
365 interrupts_restore(THREAD->saved_ipl);
366
367 return;
368 }
369
370 /*
371 * Interrupt priority level of preempted thread is recorded
372 * here to facilitate scheduler() invocations from
373 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
374 *
375 */
376 THREAD->saved_ipl = ipl;
377 }
378
379 /*
380 * Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
381 * and preemption counter. At this point CURRENT could be coming either
382 * from THREAD's or CPU's stack.
383 *
384 */
385 current_copy(CURRENT, (current_t *) CPU->stack);
386
387 /*
388 * We may not keep the old stack.
389 * Reason: If we kept the old stack and got blocked, for instance, in
390 * find_best_thread(), the old thread could get rescheduled by another
391 * CPU and overwrite the part of its own stack that was also used by
392 * the scheduler on this CPU.
393 *
394 * Moreover, we have to bypass the compiler-generated POP sequence
395 * which is fooled by SP being set to the very top of the stack.
396 * Therefore the scheduler() function continues in
397 * scheduler_separated_stack().
398 *
399 */
400 context_t ctx;
401 context_save(&ctx);
402 context_set(&ctx, FADDR(scheduler_separated_stack),
403 (uintptr_t) CPU->stack, STACK_SIZE);
404 context_restore(&ctx);
405
406 /* Not reached */
407}
408
409/** Scheduler stack switch wrapper
410 *
411 * Second part of the scheduler() function
412 * using new stack. Handling the actual context
413 * switch to a new thread.
414 *
415 */
416void scheduler_separated_stack(void)
417{
418 assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
419 assert(CPU != NULL);
420 assert(interrupts_disabled());
421
422 if (THREAD) {
423 /* Must be run after the switch to scheduler stack */
424 after_thread_ran();
425
426 switch (THREAD->state) {
427 case Running:
428 irq_spinlock_unlock(&THREAD->lock, false);
429 thread_ready(THREAD);
430 break;
431
432 case Exiting:
433 irq_spinlock_unlock(&THREAD->lock, false);
434 waitq_close(&THREAD->join_wq);
435
436 /*
437 * Release the reference CPU has for the thread.
438 * If there are no other references (e.g. threads calling join),
439 * the thread structure is deallocated.
440 */
441 thread_put(THREAD);
442 break;
443
444 case Sleeping:
445 /*
446 * Prefer the thread after it's woken up.
447 */
448 THREAD->priority = -1;
449 irq_spinlock_unlock(&THREAD->lock, false);
450 break;
451
452 default:
453 /*
454 * Entering state is unexpected.
455 */
456 panic("tid%" PRIu64 ": unexpected state %s.",
457 THREAD->tid, thread_states[THREAD->state]);
458 break;
459 }
460
461 THREAD = NULL;
462 }
463
464 THREAD = find_best_thread();
465
466 irq_spinlock_lock(&THREAD->lock, false);
467 int priority = THREAD->priority;
468 irq_spinlock_unlock(&THREAD->lock, false);
469
470 relink_rq(priority);
471
472 switch_task(THREAD->task);
473
474 irq_spinlock_lock(&THREAD->lock, false);
475 THREAD->state = Running;
476
477#ifdef SCHEDULER_VERBOSE
478 log(LF_OTHER, LVL_DEBUG,
479 "cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
480 ", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
481 THREAD->ticks, atomic_load(&CPU->nrdy));
482#endif
483
484 /*
485 * Some architectures provide late kernel PA2KA(identity)
486 * mapping in a page fault handler. However, the page fault
487 * handler uses the kernel stack of the running thread and
488 * therefore cannot be used to map it. The kernel stack, if
489 * necessary, is to be mapped in before_thread_runs(). This
490 * function must be executed before the switch to the new stack.
491 */
492 before_thread_runs();
493
494 /*
495 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
496 * thread's stack.
497 */
498 current_copy(CURRENT, (current_t *) THREAD->kstack);
499
500 context_restore(&THREAD->saved_context);
501
502 /* Not reached */
503}
504
505#ifdef CONFIG_SMP
506
507static thread_t *steal_thread_from(cpu_t *old_cpu, int i)
508{
509 runq_t *old_rq = &old_cpu->rq[i];
510 runq_t *new_rq = &CPU->rq[i];
511
512 ipl_t ipl = interrupts_disable();
513
514 irq_spinlock_lock(&old_rq->lock, false);
515
516 /*
517 * If fpu_owner is any thread in the list, its store is seen here thanks to
518 * the runqueue lock.
519 */
520 thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
521 memory_order_relaxed);
522
523 /* Search rq from the back */
524 list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
525
526 irq_spinlock_lock(&thread->lock, false);
527
528 /*
529 * Do not steal CPU-wired threads, threads
530 * already stolen, threads for which migration
531 * was temporarily disabled or threads whose
532 * FPU context is still in the CPU.
533 */
534 if (thread->stolen || thread->nomigrate ||
535 thread == fpu_owner) {
536 irq_spinlock_unlock(&thread->lock, false);
537 continue;
538 }
539
540 thread->stolen = true;
541 thread->cpu = CPU;
542
543 irq_spinlock_unlock(&thread->lock, false);
544
545 /*
546 * Ready thread on local CPU
547 */
548
549#ifdef KCPULB_VERBOSE
550 log(LF_OTHER, LVL_DEBUG,
551 "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
552 "nrdy=%ld, avg=%ld", CPU->id, thread->tid,
553 CPU->id, atomic_load(&CPU->nrdy),
554 atomic_load(&nrdy) / config.cpu_active);
555#endif
556
557 /* Remove thread from ready queue. */
558 old_rq->n--;
559 list_remove(&thread->rq_link);
560 irq_spinlock_unlock(&old_rq->lock, false);
561
562 /* Append thread to local queue. */
563 irq_spinlock_lock(&new_rq->lock, false);
564 list_append(&thread->rq_link, &new_rq->rq);
565 new_rq->n++;
566 irq_spinlock_unlock(&new_rq->lock, false);
567
568 atomic_dec(&old_cpu->nrdy);
569 atomic_inc(&CPU->nrdy);
570 interrupts_restore(ipl);
571 return thread;
572 }
573
574 irq_spinlock_unlock(&old_rq->lock, false);
575 interrupts_restore(ipl);
576 return NULL;
577}
578
579/** Load balancing thread
580 *
581 * SMP load balancing thread, supervising thread supplies
582 * for the CPU it's wired to.
583 *
584 * @param arg Generic thread argument (unused).
585 *
586 */
587void kcpulb(void *arg)
588{
589 size_t average;
590 size_t rdy;
591
592loop:
593 /*
594 * Work in 1s intervals.
595 */
596 thread_sleep(1);
597
598not_satisfied:
599 /*
600 * Calculate the number of threads that will be migrated/stolen from
601 * other CPU's. Note that situation can have changed between two
602 * passes. Each time get the most up to date counts.
603 *
604 */
605 average = atomic_load(&nrdy) / config.cpu_active + 1;
606 rdy = atomic_load(&CPU->nrdy);
607
608 if (average <= rdy)
609 goto satisfied;
610
611 size_t count = average - rdy;
612
613 /*
614 * Searching least priority queues on all CPU's first and most priority
615 * queues on all CPU's last.
616 */
617 size_t acpu;
618 int rq;
619
620 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
621 for (acpu = 0; acpu < config.cpu_active; acpu++) {
622 cpu_t *cpu = &cpus[acpu];
623
624 /*
625 * Not interested in ourselves.
626 * Doesn't require interrupt disabling for kcpulb has
627 * THREAD_FLAG_WIRED.
628 *
629 */
630 if (CPU == cpu)
631 continue;
632
633 if (atomic_load(&cpu->nrdy) <= average)
634 continue;
635
636 if (steal_thread_from(cpu, rq) && --count == 0)
637 goto satisfied;
638 }
639 }
640
641 if (atomic_load(&CPU->nrdy)) {
642 /*
643 * Be a little bit light-weight and let migrated threads run.
644 *
645 */
646 scheduler();
647 } else {
648 /*
649 * We failed to migrate a single thread.
650 * Give up this turn.
651 *
652 */
653 goto loop;
654 }
655
656 goto not_satisfied;
657
658satisfied:
659 goto loop;
660}
661#endif /* CONFIG_SMP */
662
663/** Print information about threads & scheduler queues
664 *
665 */
666void sched_print_list(void)
667{
668 size_t cpu;
669 for (cpu = 0; cpu < config.cpu_count; cpu++) {
670 if (!cpus[cpu].active)
671 continue;
672
673 /* Technically a data race, but we don't really care in this case. */
674 int needs_relink = cpus[cpu].relink_deadline - cpus[cpu].current_clock_tick;
675
676 printf("cpu%u: address=%p, nrdy=%zu, needs_relink=%d\n",
677 cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy),
678 needs_relink);
679
680 unsigned int i;
681 for (i = 0; i < RQ_COUNT; i++) {
682 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
683 if (cpus[cpu].rq[i].n == 0) {
684 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
685 continue;
686 }
687
688 printf("\trq[%u]: ", i);
689 list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
690 thread) {
691 printf("%" PRIu64 "(%s) ", thread->tid,
692 thread_states[thread->state]);
693 }
694 printf("\n");
695
696 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
697 }
698 }
699}
700
701/** @}
702 */
Note: See TracBrowser for help on using the repository browser.