source: mainline/kernel/generic/src/proc/scheduler.c@ 3efc35a

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 3efc35a was ef9a2a8, checked in by Jakub Klama <jakub.klama@…>, 12 years ago

Introduce early MMU support in kernel. At current state, it
is possible to create initial kernel address space, map kernel
identity into it and take over MMU control. ASID FIFO support
should also work.

  • Property mode set to 100644
File size: 17.5 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericproc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <proc/scheduler.h>
42#include <proc/thread.h>
43#include <proc/task.h>
44#include <mm/frame.h>
45#include <mm/page.h>
46#include <mm/as.h>
47#include <time/timeout.h>
48#include <time/delay.h>
49#include <arch/asm.h>
50#include <arch/faddr.h>
51#include <arch/cycle.h>
52#include <atomic.h>
53#include <synch/spinlock.h>
54#include <config.h>
55#include <context.h>
56#include <fpu_context.h>
57#include <func.h>
58#include <arch.h>
59#include <adt/list.h>
60#include <panic.h>
61#include <cpu.h>
62#include <print.h>
63#include <debug.h>
64#include <stacktrace.h>
65
66//#define SCHEDULER_VERBOSE 1
67
68static void scheduler_separated_stack(void);
69
70atomic_t nrdy; /**< Number of ready threads in the system. */
71
72/** Carry out actions before new task runs. */
73static void before_task_runs(void)
74{
75 before_task_runs_arch();
76}
77
78/** Take actions before new thread runs.
79 *
80 * Perform actions that need to be
81 * taken before the newly selected
82 * thread is passed control.
83 *
84 * THREAD->lock is locked on entry
85 *
86 */
87static void before_thread_runs(void)
88{
89 before_thread_runs_arch();
90
91#ifdef CONFIG_FPU_LAZY
92 if (THREAD == CPU->fpu_owner)
93 fpu_enable();
94 else
95 fpu_disable();
96#elif defined CONFIG_FPU
97 fpu_enable();
98 if (THREAD->fpu_context_exists)
99 fpu_context_restore(THREAD->saved_fpu_context);
100 else {
101 fpu_init();
102 THREAD->fpu_context_exists = true;
103 }
104#endif
105
106#ifdef CONFIG_UDEBUG
107 if (THREAD->btrace) {
108 istate_t *istate = THREAD->udebug.uspace_state;
109 if (istate != NULL) {
110 printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
111 stack_trace_istate(istate);
112 }
113
114 THREAD->btrace = false;
115 }
116#endif
117}
118
119/** Take actions after THREAD had run.
120 *
121 * Perform actions that need to be
122 * taken after the running thread
123 * had been preempted by the scheduler.
124 *
125 * THREAD->lock is locked on entry
126 *
127 */
128static void after_thread_ran(void)
129{
130 after_thread_ran_arch();
131}
132
133#ifdef CONFIG_FPU_LAZY
134void scheduler_fpu_lazy_request(void)
135{
136restart:
137 fpu_enable();
138 irq_spinlock_lock(&CPU->lock, false);
139
140 /* Save old context */
141 if (CPU->fpu_owner != NULL) {
142 irq_spinlock_lock(&CPU->fpu_owner->lock, false);
143 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
144
145 /* Don't prevent migration */
146 CPU->fpu_owner->fpu_context_engaged = false;
147 irq_spinlock_unlock(&CPU->fpu_owner->lock, false);
148 CPU->fpu_owner = NULL;
149 }
150
151 irq_spinlock_lock(&THREAD->lock, false);
152 if (THREAD->fpu_context_exists) {
153 fpu_context_restore(THREAD->saved_fpu_context);
154 } else {
155 /* Allocate FPU context */
156 if (!THREAD->saved_fpu_context) {
157 /* Might sleep */
158 irq_spinlock_unlock(&THREAD->lock, false);
159 irq_spinlock_unlock(&CPU->lock, false);
160 THREAD->saved_fpu_context =
161 (fpu_context_t *) slab_alloc(fpu_context_slab, 0);
162
163 /* We may have switched CPUs during slab_alloc */
164 goto restart;
165 }
166 fpu_init();
167 THREAD->fpu_context_exists = true;
168 }
169
170 CPU->fpu_owner = THREAD;
171 THREAD->fpu_context_engaged = true;
172 irq_spinlock_unlock(&THREAD->lock, false);
173
174 irq_spinlock_unlock(&CPU->lock, false);
175}
176#endif /* CONFIG_FPU_LAZY */
177
178/** Initialize scheduler
179 *
180 * Initialize kernel scheduler.
181 *
182 */
183void scheduler_init(void)
184{
185}
186
187/** Get thread to be scheduled
188 *
189 * Get the optimal thread to be scheduled
190 * according to thread accounting and scheduler
191 * policy.
192 *
193 * @return Thread to be scheduled.
194 *
195 */
196static thread_t *find_best_thread(void)
197{
198 ASSERT(CPU != NULL);
199
200loop:
201
202 if (atomic_get(&CPU->nrdy) == 0) {
203 /*
204 * For there was nothing to run, the CPU goes to sleep
205 * until a hardware interrupt or an IPI comes.
206 * This improves energy saving and hyperthreading.
207 */
208 irq_spinlock_lock(&CPU->lock, false);
209 CPU->idle = true;
210 irq_spinlock_unlock(&CPU->lock, false);
211 interrupts_enable();
212
213 /*
214 * An interrupt might occur right now and wake up a thread.
215 * In such case, the CPU will continue to go to sleep
216 * even though there is a runnable thread.
217 */
218 cpu_sleep();
219 interrupts_disable();
220 goto loop;
221 }
222
223 unsigned int i;
224 for (i = 0; i < RQ_COUNT; i++) {
225 irq_spinlock_lock(&(CPU->rq[i].lock), false);
226 if (CPU->rq[i].n == 0) {
227 /*
228 * If this queue is empty, try a lower-priority queue.
229 */
230 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
231 continue;
232 }
233
234 atomic_dec(&CPU->nrdy);
235 atomic_dec(&nrdy);
236 CPU->rq[i].n--;
237
238 /*
239 * Take the first thread from the queue.
240 */
241 thread_t *thread = list_get_instance(
242 list_first(&CPU->rq[i].rq), thread_t, rq_link);
243 list_remove(&thread->rq_link);
244
245 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
246
247 thread->cpu = CPU;
248 thread->ticks = us2ticks((i + 1) * 10000);
249 thread->priority = i; /* Correct rq index */
250
251 /*
252 * Clear the stolen flag so that it can be migrated
253 * when load balancing needs emerge.
254 */
255 thread->stolen = false;
256 irq_spinlock_unlock(&thread->lock, false);
257
258 return thread;
259 }
260
261 goto loop;
262}
263
264/** Prevent rq starvation
265 *
266 * Prevent low priority threads from starving in rq's.
267 *
268 * When the function decides to relink rq's, it reconnects
269 * respective pointers so that in result threads with 'pri'
270 * greater or equal start are moved to a higher-priority queue.
271 *
272 * @param start Threshold priority.
273 *
274 */
275static void relink_rq(int start)
276{
277 list_t list;
278
279 list_initialize(&list);
280 irq_spinlock_lock(&CPU->lock, false);
281
282 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
283 int i;
284 for (i = start; i < RQ_COUNT - 1; i++) {
285 /* Remember and empty rq[i + 1] */
286
287 irq_spinlock_lock(&CPU->rq[i + 1].lock, false);
288 list_concat(&list, &CPU->rq[i + 1].rq);
289 size_t n = CPU->rq[i + 1].n;
290 CPU->rq[i + 1].n = 0;
291 irq_spinlock_unlock(&CPU->rq[i + 1].lock, false);
292
293 /* Append rq[i + 1] to rq[i] */
294
295 irq_spinlock_lock(&CPU->rq[i].lock, false);
296 list_concat(&CPU->rq[i].rq, &list);
297 CPU->rq[i].n += n;
298 irq_spinlock_unlock(&CPU->rq[i].lock, false);
299 }
300
301 CPU->needs_relink = 0;
302 }
303
304 irq_spinlock_unlock(&CPU->lock, false);
305}
306
307/** The scheduler
308 *
309 * The thread scheduling procedure.
310 * Passes control directly to
311 * scheduler_separated_stack().
312 *
313 */
314void scheduler(void)
315{
316 volatile ipl_t ipl;
317
318 ASSERT(CPU != NULL);
319
320 ipl = interrupts_disable();
321
322 if (atomic_get(&haltstate))
323 halt();
324
325 if (THREAD) {
326 irq_spinlock_lock(&THREAD->lock, false);
327
328 /* Update thread kernel accounting */
329 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
330
331#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
332 fpu_context_save(THREAD->saved_fpu_context);
333#endif
334 if (!context_save(&THREAD->saved_context)) {
335 /*
336 * This is the place where threads leave scheduler();
337 */
338
339 /* Save current CPU cycle */
340 THREAD->last_cycle = get_cycle();
341
342 irq_spinlock_unlock(&THREAD->lock, false);
343 interrupts_restore(THREAD->saved_context.ipl);
344
345 return;
346 }
347
348 /*
349 * Interrupt priority level of preempted thread is recorded
350 * here to facilitate scheduler() invocations from
351 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
352 *
353 */
354 THREAD->saved_context.ipl = ipl;
355 }
356
357 /*
358 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, AS
359 * and preemption counter. At this point THE could be coming either
360 * from THREAD's or CPU's stack.
361 *
362 */
363 the_copy(THE, (the_t *) CPU->stack);
364
365 /*
366 * We may not keep the old stack.
367 * Reason: If we kept the old stack and got blocked, for instance, in
368 * find_best_thread(), the old thread could get rescheduled by another
369 * CPU and overwrite the part of its own stack that was also used by
370 * the scheduler on this CPU.
371 *
372 * Moreover, we have to bypass the compiler-generated POP sequence
373 * which is fooled by SP being set to the very top of the stack.
374 * Therefore the scheduler() function continues in
375 * scheduler_separated_stack().
376 *
377 */
378 context_save(&CPU->saved_context);
379 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
380 (uintptr_t) CPU->stack, STACK_SIZE);
381 context_restore(&CPU->saved_context);
382
383 /* Not reached */
384}
385
386/** Scheduler stack switch wrapper
387 *
388 * Second part of the scheduler() function
389 * using new stack. Handling the actual context
390 * switch to a new thread.
391 *
392 */
393void scheduler_separated_stack(void)
394{
395 DEADLOCK_PROBE_INIT(p_joinwq);
396 task_t *old_task = TASK;
397 as_t *old_as = AS;
398
399 ASSERT((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
400 ASSERT(CPU != NULL);
401
402 /*
403 * Hold the current task and the address space to prevent their
404 * possible destruction should thread_destroy() be called on this or any
405 * other processor while the scheduler is still using them.
406 */
407 if (old_task)
408 task_hold(old_task);
409
410 if (old_as)
411 as_hold(old_as);
412
413 if (THREAD) {
414 /* Must be run after the switch to scheduler stack */
415 after_thread_ran();
416
417 switch (THREAD->state) {
418 case Running:
419 irq_spinlock_unlock(&THREAD->lock, false);
420 thread_ready(THREAD);
421 break;
422
423 case Exiting:
424repeat:
425 if (THREAD->detached) {
426 thread_destroy(THREAD, false);
427 } else {
428 /*
429 * The thread structure is kept allocated until
430 * somebody calls thread_detach() on it.
431 */
432 if (!irq_spinlock_trylock(&THREAD->join_wq.lock)) {
433 /*
434 * Avoid deadlock.
435 */
436 irq_spinlock_unlock(&THREAD->lock, false);
437 delay(HZ);
438 irq_spinlock_lock(&THREAD->lock, false);
439 DEADLOCK_PROBE(p_joinwq,
440 DEADLOCK_THRESHOLD);
441 goto repeat;
442 }
443 _waitq_wakeup_unsafe(&THREAD->join_wq,
444 WAKEUP_FIRST);
445 irq_spinlock_unlock(&THREAD->join_wq.lock, false);
446
447 THREAD->state = Lingering;
448 irq_spinlock_unlock(&THREAD->lock, false);
449 }
450 break;
451
452 case Sleeping:
453 /*
454 * Prefer the thread after it's woken up.
455 */
456 THREAD->priority = -1;
457
458 /*
459 * We need to release wq->lock which we locked in
460 * waitq_sleep(). Address of wq->lock is kept in
461 * THREAD->sleep_queue.
462 */
463 irq_spinlock_unlock(&THREAD->sleep_queue->lock, false);
464
465 irq_spinlock_unlock(&THREAD->lock, false);
466 break;
467
468 default:
469 /*
470 * Entering state is unexpected.
471 */
472 panic("tid%" PRIu64 ": unexpected state %s.",
473 THREAD->tid, thread_states[THREAD->state]);
474 break;
475 }
476
477 THREAD = NULL;
478 }
479
480 THREAD = find_best_thread();
481
482 irq_spinlock_lock(&THREAD->lock, false);
483 int priority = THREAD->priority;
484 irq_spinlock_unlock(&THREAD->lock, false);
485
486 relink_rq(priority);
487
488 /*
489 * If both the old and the new task are the same,
490 * lots of work is avoided.
491 */
492 if (TASK != THREAD->task) {
493 as_t *new_as = THREAD->task->as;
494
495 /*
496 * Note that it is possible for two tasks
497 * to share one address space.
498 */
499 if (old_as != new_as) {
500 /*
501 * Both tasks and address spaces are different.
502 * Replace the old one with the new one.
503 */
504 as_switch(old_as, new_as);
505 }
506
507 TASK = THREAD->task;
508 before_task_runs();
509 }
510
511 if (old_task)
512 task_release(old_task);
513
514 if (old_as)
515 as_release(old_as);
516
517 irq_spinlock_lock(&THREAD->lock, false);
518 THREAD->state = Running;
519
520#ifdef SCHEDULER_VERBOSE
521 printf("cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
522 ", nrdy=%d)\n", CPU->id, THREAD->tid, THREAD->priority,
523 THREAD->ticks, atomic_get(&CPU->nrdy));
524#endif
525
526 /*
527 * Some architectures provide late kernel PA2KA(identity)
528 * mapping in a page fault handler. However, the page fault
529 * handler uses the kernel stack of the running thread and
530 * therefore cannot be used to map it. The kernel stack, if
531 * necessary, is to be mapped in before_thread_runs(). This
532 * function must be executed before the switch to the new stack.
533 */
534 before_thread_runs();
535
536 /*
537 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
538 * thread's stack.
539 */
540 the_copy(THE, (the_t *) THREAD->kstack);
541
542 context_restore(&THREAD->saved_context);
543
544 /* Not reached */
545}
546
547#ifdef CONFIG_SMP
548/** Load balancing thread
549 *
550 * SMP load balancing thread, supervising thread supplies
551 * for the CPU it's wired to.
552 *
553 * @param arg Generic thread argument (unused).
554 *
555 */
556void kcpulb(void *arg)
557{
558 atomic_count_t average;
559 atomic_count_t rdy;
560
561 /*
562 * Detach kcpulb as nobody will call thread_join_timeout() on it.
563 */
564 thread_detach(THREAD);
565
566loop:
567 /*
568 * Work in 1s intervals.
569 */
570 thread_sleep(1);
571
572not_satisfied:
573 /*
574 * Calculate the number of threads that will be migrated/stolen from
575 * other CPU's. Note that situation can have changed between two
576 * passes. Each time get the most up to date counts.
577 *
578 */
579 average = atomic_get(&nrdy) / config.cpu_active + 1;
580 rdy = atomic_get(&CPU->nrdy);
581
582 if (average <= rdy)
583 goto satisfied;
584
585 atomic_count_t count = average - rdy;
586
587 /*
588 * Searching least priority queues on all CPU's first and most priority
589 * queues on all CPU's last.
590 */
591 size_t acpu;
592 size_t acpu_bias = 0;
593 int rq;
594
595 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
596 for (acpu = 0; acpu < config.cpu_active; acpu++) {
597 cpu_t *cpu = &cpus[(acpu + acpu_bias) % config.cpu_active];
598
599 /*
600 * Not interested in ourselves.
601 * Doesn't require interrupt disabling for kcpulb has
602 * THREAD_FLAG_WIRED.
603 *
604 */
605 if (CPU == cpu)
606 continue;
607
608 if (atomic_get(&cpu->nrdy) <= average)
609 continue;
610
611 irq_spinlock_lock(&(cpu->rq[rq].lock), true);
612 if (cpu->rq[rq].n == 0) {
613 irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
614 continue;
615 }
616
617 thread_t *thread = NULL;
618
619 /* Search rq from the back */
620 link_t *link = cpu->rq[rq].rq.head.prev;
621
622 while (link != &(cpu->rq[rq].rq.head)) {
623 thread = (thread_t *) list_get_instance(link,
624 thread_t, rq_link);
625
626 /*
627 * Do not steal CPU-wired threads, threads
628 * already stolen, threads for which migration
629 * was temporarily disabled or threads whose
630 * FPU context is still in the CPU.
631 */
632 irq_spinlock_lock(&thread->lock, false);
633
634 if ((!thread->wired) && (!thread->stolen) &&
635 (!thread->nomigrate) &&
636 (!thread->fpu_context_engaged)) {
637 /*
638 * Remove thread from ready queue.
639 */
640 irq_spinlock_unlock(&thread->lock,
641 false);
642
643 atomic_dec(&cpu->nrdy);
644 atomic_dec(&nrdy);
645
646 cpu->rq[rq].n--;
647 list_remove(&thread->rq_link);
648
649 break;
650 }
651
652 irq_spinlock_unlock(&thread->lock, false);
653
654 link = link->prev;
655 thread = NULL;
656 }
657
658 if (thread) {
659 /*
660 * Ready thread on local CPU
661 */
662
663 irq_spinlock_pass(&(cpu->rq[rq].lock),
664 &thread->lock);
665
666#ifdef KCPULB_VERBOSE
667 printf("kcpulb%u: TID %" PRIu64 " -> cpu%u, "
668 "nrdy=%ld, avg=%ld\n", CPU->id, t->tid,
669 CPU->id, atomic_get(&CPU->nrdy),
670 atomic_get(&nrdy) / config.cpu_active);
671#endif
672
673 thread->stolen = true;
674 thread->state = Entering;
675
676 irq_spinlock_unlock(&thread->lock, true);
677 thread_ready(thread);
678
679 if (--count == 0)
680 goto satisfied;
681
682 /*
683 * We are not satisfied yet, focus on another
684 * CPU next time.
685 *
686 */
687 acpu_bias++;
688
689 continue;
690 } else
691 irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
692
693 }
694 }
695
696 if (atomic_get(&CPU->nrdy)) {
697 /*
698 * Be a little bit light-weight and let migrated threads run.
699 *
700 */
701 scheduler();
702 } else {
703 /*
704 * We failed to migrate a single thread.
705 * Give up this turn.
706 *
707 */
708 goto loop;
709 }
710
711 goto not_satisfied;
712
713satisfied:
714 goto loop;
715}
716#endif /* CONFIG_SMP */
717
718/** Print information about threads & scheduler queues
719 *
720 */
721void sched_print_list(void)
722{
723 size_t cpu;
724 for (cpu = 0; cpu < config.cpu_count; cpu++) {
725 if (!cpus[cpu].active)
726 continue;
727
728 irq_spinlock_lock(&cpus[cpu].lock, true);
729
730 printf("cpu%u: address=%p, nrdy=%" PRIua ", needs_relink=%zu\n",
731 cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
732 cpus[cpu].needs_relink);
733
734 unsigned int i;
735 for (i = 0; i < RQ_COUNT; i++) {
736 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
737 if (cpus[cpu].rq[i].n == 0) {
738 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
739 continue;
740 }
741
742 printf("\trq[%u]: ", i);
743 list_foreach(cpus[cpu].rq[i].rq, cur) {
744 thread_t *thread = list_get_instance(cur,
745 thread_t, rq_link);
746 printf("%" PRIu64 "(%s) ", thread->tid,
747 thread_states[thread->state]);
748 }
749 printf("\n");
750
751 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
752 }
753
754 irq_spinlock_unlock(&cpus[cpu].lock, true);
755 }
756}
757
758/** @}
759 */
Note: See TracBrowser for help on using the repository browser.