source: mainline/kernel/generic/src/proc/scheduler.c@ 314f4b59

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 314f4b59 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 17.5 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericproc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <assert.h>
42#include <proc/scheduler.h>
43#include <proc/thread.h>
44#include <proc/task.h>
45#include <mm/frame.h>
46#include <mm/page.h>
47#include <mm/as.h>
48#include <time/timeout.h>
49#include <time/delay.h>
50#include <arch/asm.h>
51#include <arch/faddr.h>
52#include <arch/cycle.h>
53#include <atomic.h>
54#include <synch/spinlock.h>
55#include <synch/workqueue.h>
56#include <synch/rcu.h>
57#include <config.h>
58#include <context.h>
59#include <fpu_context.h>
60#include <halt.h>
61#include <arch.h>
62#include <adt/list.h>
63#include <panic.h>
64#include <cpu.h>
65#include <print.h>
66#include <log.h>
67#include <stacktrace.h>
68
69static void scheduler_separated_stack(void);
70
71atomic_t nrdy; /**< Number of ready threads in the system. */
72
73/** Carry out actions before new task runs. */
74static void before_task_runs(void)
75{
76 before_task_runs_arch();
77}
78
79/** Take actions before new thread runs.
80 *
81 * Perform actions that need to be
82 * taken before the newly selected
83 * thread is passed control.
84 *
85 * THREAD->lock is locked on entry
86 *
87 */
88static void before_thread_runs(void)
89{
90 before_thread_runs_arch();
91 rcu_before_thread_runs();
92
93#ifdef CONFIG_FPU_LAZY
94 if (THREAD == CPU->fpu_owner)
95 fpu_enable();
96 else
97 fpu_disable();
98#elif defined CONFIG_FPU
99 fpu_enable();
100 if (THREAD->fpu_context_exists)
101 fpu_context_restore(THREAD->saved_fpu_context);
102 else {
103 fpu_init();
104 THREAD->fpu_context_exists = true;
105 }
106#endif
107
108#ifdef CONFIG_UDEBUG
109 if (THREAD->btrace) {
110 istate_t *istate = THREAD->udebug.uspace_state;
111 if (istate != NULL) {
112 printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
113 stack_trace_istate(istate);
114 }
115
116 THREAD->btrace = false;
117 }
118#endif
119}
120
121/** Take actions after THREAD had run.
122 *
123 * Perform actions that need to be
124 * taken after the running thread
125 * had been preempted by the scheduler.
126 *
127 * THREAD->lock is locked on entry
128 *
129 */
130static void after_thread_ran(void)
131{
132 workq_after_thread_ran();
133 rcu_after_thread_ran();
134 after_thread_ran_arch();
135}
136
137#ifdef CONFIG_FPU_LAZY
138void scheduler_fpu_lazy_request(void)
139{
140restart:
141 fpu_enable();
142 irq_spinlock_lock(&CPU->lock, false);
143
144 /* Save old context */
145 if (CPU->fpu_owner != NULL) {
146 irq_spinlock_lock(&CPU->fpu_owner->lock, false);
147 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
148
149 /* Don't prevent migration */
150 CPU->fpu_owner->fpu_context_engaged = false;
151 irq_spinlock_unlock(&CPU->fpu_owner->lock, false);
152 CPU->fpu_owner = NULL;
153 }
154
155 irq_spinlock_lock(&THREAD->lock, false);
156 if (THREAD->fpu_context_exists) {
157 fpu_context_restore(THREAD->saved_fpu_context);
158 } else {
159 /* Allocate FPU context */
160 if (!THREAD->saved_fpu_context) {
161 /* Might sleep */
162 irq_spinlock_unlock(&THREAD->lock, false);
163 irq_spinlock_unlock(&CPU->lock, false);
164 THREAD->saved_fpu_context =
165 (fpu_context_t *) slab_alloc(fpu_context_cache, 0);
166
167 /* We may have switched CPUs during slab_alloc */
168 goto restart;
169 }
170 fpu_init();
171 THREAD->fpu_context_exists = true;
172 }
173
174 CPU->fpu_owner = THREAD;
175 THREAD->fpu_context_engaged = true;
176 irq_spinlock_unlock(&THREAD->lock, false);
177
178 irq_spinlock_unlock(&CPU->lock, false);
179}
180#endif /* CONFIG_FPU_LAZY */
181
182/** Initialize scheduler
183 *
184 * Initialize kernel scheduler.
185 *
186 */
187void scheduler_init(void)
188{
189}
190
191/** Get thread to be scheduled
192 *
193 * Get the optimal thread to be scheduled
194 * according to thread accounting and scheduler
195 * policy.
196 *
197 * @return Thread to be scheduled.
198 *
199 */
200static thread_t *find_best_thread(void)
201{
202 assert(CPU != NULL);
203
204loop:
205
206 if (atomic_get(&CPU->nrdy) == 0) {
207 /*
208 * For there was nothing to run, the CPU goes to sleep
209 * until a hardware interrupt or an IPI comes.
210 * This improves energy saving and hyperthreading.
211 */
212 irq_spinlock_lock(&CPU->lock, false);
213 CPU->idle = true;
214 irq_spinlock_unlock(&CPU->lock, false);
215 interrupts_enable();
216
217 /*
218 * An interrupt might occur right now and wake up a thread.
219 * In such case, the CPU will continue to go to sleep
220 * even though there is a runnable thread.
221 */
222 cpu_sleep();
223 interrupts_disable();
224 goto loop;
225 }
226
227 assert(!CPU->idle);
228
229 unsigned int i;
230 for (i = 0; i < RQ_COUNT; i++) {
231 irq_spinlock_lock(&(CPU->rq[i].lock), false);
232 if (CPU->rq[i].n == 0) {
233 /*
234 * If this queue is empty, try a lower-priority queue.
235 */
236 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
237 continue;
238 }
239
240 atomic_dec(&CPU->nrdy);
241 atomic_dec(&nrdy);
242 CPU->rq[i].n--;
243
244 /*
245 * Take the first thread from the queue.
246 */
247 thread_t *thread = list_get_instance(
248 list_first(&CPU->rq[i].rq), thread_t, rq_link);
249 list_remove(&thread->rq_link);
250
251 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
252
253 thread->cpu = CPU;
254 thread->ticks = us2ticks((i + 1) * 10000);
255 thread->priority = i; /* Correct rq index */
256
257 /*
258 * Clear the stolen flag so that it can be migrated
259 * when load balancing needs emerge.
260 */
261 thread->stolen = false;
262 irq_spinlock_unlock(&thread->lock, false);
263
264 return thread;
265 }
266
267 goto loop;
268}
269
270/** Prevent rq starvation
271 *
272 * Prevent low priority threads from starving in rq's.
273 *
274 * When the function decides to relink rq's, it reconnects
275 * respective pointers so that in result threads with 'pri'
276 * greater or equal start are moved to a higher-priority queue.
277 *
278 * @param start Threshold priority.
279 *
280 */
281static void relink_rq(int start)
282{
283 list_t list;
284
285 list_initialize(&list);
286 irq_spinlock_lock(&CPU->lock, false);
287
288 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
289 int i;
290 for (i = start; i < RQ_COUNT - 1; i++) {
291 /* Remember and empty rq[i + 1] */
292
293 irq_spinlock_lock(&CPU->rq[i + 1].lock, false);
294 list_concat(&list, &CPU->rq[i + 1].rq);
295 size_t n = CPU->rq[i + 1].n;
296 CPU->rq[i + 1].n = 0;
297 irq_spinlock_unlock(&CPU->rq[i + 1].lock, false);
298
299 /* Append rq[i + 1] to rq[i] */
300
301 irq_spinlock_lock(&CPU->rq[i].lock, false);
302 list_concat(&CPU->rq[i].rq, &list);
303 CPU->rq[i].n += n;
304 irq_spinlock_unlock(&CPU->rq[i].lock, false);
305 }
306
307 CPU->needs_relink = 0;
308 }
309
310 irq_spinlock_unlock(&CPU->lock, false);
311}
312
313/** The scheduler
314 *
315 * The thread scheduling procedure.
316 * Passes control directly to
317 * scheduler_separated_stack().
318 *
319 */
320void scheduler(void)
321{
322 volatile ipl_t ipl;
323
324 assert(CPU != NULL);
325
326 ipl = interrupts_disable();
327
328 if (atomic_get(&haltstate))
329 halt();
330
331 if (THREAD) {
332 irq_spinlock_lock(&THREAD->lock, false);
333
334 /* Update thread kernel accounting */
335 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
336
337#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
338 fpu_context_save(THREAD->saved_fpu_context);
339#endif
340 if (!context_save(&THREAD->saved_context)) {
341 /*
342 * This is the place where threads leave scheduler();
343 */
344
345 /* Save current CPU cycle */
346 THREAD->last_cycle = get_cycle();
347
348 irq_spinlock_unlock(&THREAD->lock, false);
349 interrupts_restore(THREAD->saved_context.ipl);
350
351 return;
352 }
353
354 /*
355 * Interrupt priority level of preempted thread is recorded
356 * here to facilitate scheduler() invocations from
357 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
358 *
359 */
360 THREAD->saved_context.ipl = ipl;
361 }
362
363 /*
364 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, AS
365 * and preemption counter. At this point THE could be coming either
366 * from THREAD's or CPU's stack.
367 *
368 */
369 the_copy(THE, (the_t *) CPU->stack);
370
371 /*
372 * We may not keep the old stack.
373 * Reason: If we kept the old stack and got blocked, for instance, in
374 * find_best_thread(), the old thread could get rescheduled by another
375 * CPU and overwrite the part of its own stack that was also used by
376 * the scheduler on this CPU.
377 *
378 * Moreover, we have to bypass the compiler-generated POP sequence
379 * which is fooled by SP being set to the very top of the stack.
380 * Therefore the scheduler() function continues in
381 * scheduler_separated_stack().
382 *
383 */
384 context_save(&CPU->saved_context);
385 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
386 (uintptr_t) CPU->stack, STACK_SIZE);
387 context_restore(&CPU->saved_context);
388
389 /* Not reached */
390}
391
392/** Scheduler stack switch wrapper
393 *
394 * Second part of the scheduler() function
395 * using new stack. Handling the actual context
396 * switch to a new thread.
397 *
398 */
399void scheduler_separated_stack(void)
400{
401 DEADLOCK_PROBE_INIT(p_joinwq);
402 task_t *old_task = TASK;
403 as_t *old_as = AS;
404
405 assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
406 assert(CPU != NULL);
407 assert(interrupts_disabled());
408
409 /*
410 * Hold the current task and the address space to prevent their
411 * possible destruction should thread_destroy() be called on this or any
412 * other processor while the scheduler is still using them.
413 */
414 if (old_task)
415 task_hold(old_task);
416
417 if (old_as)
418 as_hold(old_as);
419
420 if (THREAD) {
421 /* Must be run after the switch to scheduler stack */
422 after_thread_ran();
423
424 switch (THREAD->state) {
425 case Running:
426 irq_spinlock_unlock(&THREAD->lock, false);
427 thread_ready(THREAD);
428 break;
429
430 case Exiting:
431 rcu_thread_exiting();
432repeat:
433 if (THREAD->detached) {
434 thread_destroy(THREAD, false);
435 } else {
436 /*
437 * The thread structure is kept allocated until
438 * somebody calls thread_detach() on it.
439 */
440 if (!irq_spinlock_trylock(&THREAD->join_wq.lock)) {
441 /*
442 * Avoid deadlock.
443 */
444 irq_spinlock_unlock(&THREAD->lock, false);
445 delay(HZ);
446 irq_spinlock_lock(&THREAD->lock, false);
447 DEADLOCK_PROBE(p_joinwq,
448 DEADLOCK_THRESHOLD);
449 goto repeat;
450 }
451 _waitq_wakeup_unsafe(&THREAD->join_wq,
452 WAKEUP_FIRST);
453 irq_spinlock_unlock(&THREAD->join_wq.lock, false);
454
455 THREAD->state = Lingering;
456 irq_spinlock_unlock(&THREAD->lock, false);
457 }
458 break;
459
460 case Sleeping:
461 /*
462 * Prefer the thread after it's woken up.
463 */
464 THREAD->priority = -1;
465
466 /*
467 * We need to release wq->lock which we locked in
468 * waitq_sleep(). Address of wq->lock is kept in
469 * THREAD->sleep_queue.
470 */
471 irq_spinlock_unlock(&THREAD->sleep_queue->lock, false);
472
473 irq_spinlock_unlock(&THREAD->lock, false);
474 break;
475
476 default:
477 /*
478 * Entering state is unexpected.
479 */
480 panic("tid%" PRIu64 ": unexpected state %s.",
481 THREAD->tid, thread_states[THREAD->state]);
482 break;
483 }
484
485 THREAD = NULL;
486 }
487
488 THREAD = find_best_thread();
489
490 irq_spinlock_lock(&THREAD->lock, false);
491 int priority = THREAD->priority;
492 irq_spinlock_unlock(&THREAD->lock, false);
493
494 relink_rq(priority);
495
496 /*
497 * If both the old and the new task are the same,
498 * lots of work is avoided.
499 */
500 if (TASK != THREAD->task) {
501 as_t *new_as = THREAD->task->as;
502
503 /*
504 * Note that it is possible for two tasks
505 * to share one address space.
506 */
507 if (old_as != new_as) {
508 /*
509 * Both tasks and address spaces are different.
510 * Replace the old one with the new one.
511 */
512 as_switch(old_as, new_as);
513 }
514
515 TASK = THREAD->task;
516 before_task_runs();
517 }
518
519 if (old_task)
520 task_release(old_task);
521
522 if (old_as)
523 as_release(old_as);
524
525 irq_spinlock_lock(&THREAD->lock, false);
526 THREAD->state = Running;
527
528#ifdef SCHEDULER_VERBOSE
529 log(LF_OTHER, LVL_DEBUG,
530 "cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
531 ", nrdy=%" PRIua ")", CPU->id, THREAD->tid, THREAD->priority,
532 THREAD->ticks, atomic_get(&CPU->nrdy));
533#endif
534
535 /*
536 * Some architectures provide late kernel PA2KA(identity)
537 * mapping in a page fault handler. However, the page fault
538 * handler uses the kernel stack of the running thread and
539 * therefore cannot be used to map it. The kernel stack, if
540 * necessary, is to be mapped in before_thread_runs(). This
541 * function must be executed before the switch to the new stack.
542 */
543 before_thread_runs();
544
545 /*
546 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
547 * thread's stack.
548 */
549 the_copy(THE, (the_t *) THREAD->kstack);
550
551 context_restore(&THREAD->saved_context);
552
553 /* Not reached */
554}
555
556#ifdef CONFIG_SMP
557/** Load balancing thread
558 *
559 * SMP load balancing thread, supervising thread supplies
560 * for the CPU it's wired to.
561 *
562 * @param arg Generic thread argument (unused).
563 *
564 */
565void kcpulb(void *arg)
566{
567 atomic_count_t average;
568 atomic_count_t rdy;
569
570 /*
571 * Detach kcpulb as nobody will call thread_join_timeout() on it.
572 */
573 thread_detach(THREAD);
574
575loop:
576 /*
577 * Work in 1s intervals.
578 */
579 thread_sleep(1);
580
581not_satisfied:
582 /*
583 * Calculate the number of threads that will be migrated/stolen from
584 * other CPU's. Note that situation can have changed between two
585 * passes. Each time get the most up to date counts.
586 *
587 */
588 average = atomic_get(&nrdy) / config.cpu_active + 1;
589 rdy = atomic_get(&CPU->nrdy);
590
591 if (average <= rdy)
592 goto satisfied;
593
594 atomic_count_t count = average - rdy;
595
596 /*
597 * Searching least priority queues on all CPU's first and most priority
598 * queues on all CPU's last.
599 */
600 size_t acpu;
601 size_t acpu_bias = 0;
602 int rq;
603
604 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
605 for (acpu = 0; acpu < config.cpu_active; acpu++) {
606 cpu_t *cpu = &cpus[(acpu + acpu_bias) % config.cpu_active];
607
608 /*
609 * Not interested in ourselves.
610 * Doesn't require interrupt disabling for kcpulb has
611 * THREAD_FLAG_WIRED.
612 *
613 */
614 if (CPU == cpu)
615 continue;
616
617 if (atomic_get(&cpu->nrdy) <= average)
618 continue;
619
620 irq_spinlock_lock(&(cpu->rq[rq].lock), true);
621 if (cpu->rq[rq].n == 0) {
622 irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
623 continue;
624 }
625
626 thread_t *thread = NULL;
627
628 /* Search rq from the back */
629 link_t *link = cpu->rq[rq].rq.head.prev;
630
631 while (link != &(cpu->rq[rq].rq.head)) {
632 thread = (thread_t *) list_get_instance(link,
633 thread_t, rq_link);
634
635 /*
636 * Do not steal CPU-wired threads, threads
637 * already stolen, threads for which migration
638 * was temporarily disabled or threads whose
639 * FPU context is still in the CPU.
640 */
641 irq_spinlock_lock(&thread->lock, false);
642
643 if ((!thread->wired) && (!thread->stolen) &&
644 (!thread->nomigrate) &&
645 (!thread->fpu_context_engaged)) {
646 /*
647 * Remove thread from ready queue.
648 */
649 irq_spinlock_unlock(&thread->lock,
650 false);
651
652 atomic_dec(&cpu->nrdy);
653 atomic_dec(&nrdy);
654
655 cpu->rq[rq].n--;
656 list_remove(&thread->rq_link);
657
658 break;
659 }
660
661 irq_spinlock_unlock(&thread->lock, false);
662
663 link = link->prev;
664 thread = NULL;
665 }
666
667 if (thread) {
668 /*
669 * Ready thread on local CPU
670 */
671
672 irq_spinlock_pass(&(cpu->rq[rq].lock),
673 &thread->lock);
674
675#ifdef KCPULB_VERBOSE
676 log(LF_OTHER, LVL_DEBUG,
677 "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
678 "nrdy=%ld, avg=%ld", CPU->id, t->tid,
679 CPU->id, atomic_get(&CPU->nrdy),
680 atomic_get(&nrdy) / config.cpu_active);
681#endif
682
683 thread->stolen = true;
684 thread->state = Entering;
685
686 irq_spinlock_unlock(&thread->lock, true);
687 thread_ready(thread);
688
689 if (--count == 0)
690 goto satisfied;
691
692 /*
693 * We are not satisfied yet, focus on another
694 * CPU next time.
695 *
696 */
697 acpu_bias++;
698
699 continue;
700 } else
701 irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
702
703 }
704 }
705
706 if (atomic_get(&CPU->nrdy)) {
707 /*
708 * Be a little bit light-weight and let migrated threads run.
709 *
710 */
711 scheduler();
712 } else {
713 /*
714 * We failed to migrate a single thread.
715 * Give up this turn.
716 *
717 */
718 goto loop;
719 }
720
721 goto not_satisfied;
722
723satisfied:
724 goto loop;
725}
726#endif /* CONFIG_SMP */
727
728/** Print information about threads & scheduler queues
729 *
730 */
731void sched_print_list(void)
732{
733 size_t cpu;
734 for (cpu = 0; cpu < config.cpu_count; cpu++) {
735 if (!cpus[cpu].active)
736 continue;
737
738 irq_spinlock_lock(&cpus[cpu].lock, true);
739
740 printf("cpu%u: address=%p, nrdy=%" PRIua ", needs_relink=%zu\n",
741 cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
742 cpus[cpu].needs_relink);
743
744 unsigned int i;
745 for (i = 0; i < RQ_COUNT; i++) {
746 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
747 if (cpus[cpu].rq[i].n == 0) {
748 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
749 continue;
750 }
751
752 printf("\trq[%u]: ", i);
753 list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
754 thread) {
755 printf("%" PRIu64 "(%s) ", thread->tid,
756 thread_states[thread->state]);
757 }
758 printf("\n");
759
760 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
761 }
762
763 irq_spinlock_unlock(&cpus[cpu].lock, true);
764 }
765}
766
767/** @}
768 */
Note: See TracBrowser for help on using the repository browser.