source: mainline/kernel/generic/src/proc/scheduler.c@ 4e7d3dd

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 4e7d3dd was 4e7d3dd, checked in by Martin Decky <martin@…>, 14 years ago

cstyle (no change in functionality)

  • Property mode set to 100644
File size: 17.4 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericproc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <proc/scheduler.h>
42#include <proc/thread.h>
43#include <proc/task.h>
44#include <mm/frame.h>
45#include <mm/page.h>
46#include <mm/as.h>
47#include <time/timeout.h>
48#include <time/delay.h>
49#include <arch/asm.h>
50#include <arch/faddr.h>
51#include <arch/cycle.h>
52#include <atomic.h>
53#include <synch/spinlock.h>
54#include <config.h>
55#include <context.h>
56#include <fpu_context.h>
57#include <func.h>
58#include <arch.h>
59#include <adt/list.h>
60#include <panic.h>
61#include <cpu.h>
62#include <print.h>
63#include <debug.h>
64
65static void scheduler_separated_stack(void);
66
67atomic_t nrdy; /**< Number of ready threads in the system. */
68
69/** Carry out actions before new task runs. */
70static void before_task_runs(void)
71{
72 before_task_runs_arch();
73}
74
75/** Take actions before new thread runs.
76 *
77 * Perform actions that need to be
78 * taken before the newly selected
79 * tread is passed control.
80 *
81 * THREAD->lock is locked on entry
82 *
83 */
84static void before_thread_runs(void)
85{
86 before_thread_runs_arch();
87
88#ifdef CONFIG_FPU_LAZY
89 if(THREAD == CPU->fpu_owner)
90 fpu_enable();
91 else
92 fpu_disable();
93#else
94 fpu_enable();
95 if (THREAD->fpu_context_exists)
96 fpu_context_restore(THREAD->saved_fpu_context);
97 else {
98 fpu_init();
99 THREAD->fpu_context_exists = 1;
100 }
101#endif
102}
103
104/** Take actions after THREAD had run.
105 *
106 * Perform actions that need to be
107 * taken after the running thread
108 * had been preempted by the scheduler.
109 *
110 * THREAD->lock is locked on entry
111 *
112 */
113static void after_thread_ran(void)
114{
115 after_thread_ran_arch();
116}
117
118#ifdef CONFIG_FPU_LAZY
119void scheduler_fpu_lazy_request(void)
120{
121restart:
122 fpu_enable();
123 irq_spinlock_lock(&CPU->lock, false);
124
125 /* Save old context */
126 if (CPU->fpu_owner != NULL) {
127 irq_spinlock_lock(&CPU->fpu_owner->lock, false);
128 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
129
130 /* Don't prevent migration */
131 CPU->fpu_owner->fpu_context_engaged = 0;
132 irq_spinlock_unlock(&CPU->fpu_owner->lock, false);
133 CPU->fpu_owner = NULL;
134 }
135
136 irq_spinlock_lock(&THREAD->lock, false);
137 if (THREAD->fpu_context_exists) {
138 fpu_context_restore(THREAD->saved_fpu_context);
139 } else {
140 /* Allocate FPU context */
141 if (!THREAD->saved_fpu_context) {
142 /* Might sleep */
143 irq_spinlock_unlock(&THREAD->lock, false);
144 irq_spinlock_unlock(&CPU->lock, false);
145 THREAD->saved_fpu_context =
146 (fpu_context_t *) slab_alloc(fpu_context_slab, 0);
147
148 /* We may have switched CPUs during slab_alloc */
149 goto restart;
150 }
151 fpu_init();
152 THREAD->fpu_context_exists = 1;
153 }
154
155 CPU->fpu_owner = THREAD;
156 THREAD->fpu_context_engaged = 1;
157 irq_spinlock_unlock(&THREAD->lock, false);
158
159 irq_spinlock_unlock(&CPU->lock, false);
160}
161#endif /* CONFIG_FPU_LAZY */
162
163/** Initialize scheduler
164 *
165 * Initialize kernel scheduler.
166 *
167 */
168void scheduler_init(void)
169{
170}
171
172/** Get thread to be scheduled
173 *
174 * Get the optimal thread to be scheduled
175 * according to thread accounting and scheduler
176 * policy.
177 *
178 * @return Thread to be scheduled.
179 *
180 */
181static thread_t *find_best_thread(void)
182{
183 ASSERT(CPU != NULL);
184
185loop:
186
187 if (atomic_get(&CPU->nrdy) == 0) {
188 /*
189 * For there was nothing to run, the CPU goes to sleep
190 * until a hardware interrupt or an IPI comes.
191 * This improves energy saving and hyperthreading.
192 */
193 irq_spinlock_lock(&CPU->lock, false);
194 CPU->idle = true;
195 irq_spinlock_unlock(&CPU->lock, false);
196 interrupts_enable();
197
198 /*
199 * An interrupt might occur right now and wake up a thread.
200 * In such case, the CPU will continue to go to sleep
201 * even though there is a runnable thread.
202 */
203 cpu_sleep();
204 interrupts_disable();
205 goto loop;
206 }
207
208 unsigned int i;
209 for (i = 0; i < RQ_COUNT; i++) {
210 irq_spinlock_lock(&(CPU->rq[i].lock), false);
211 if (CPU->rq[i].n == 0) {
212 /*
213 * If this queue is empty, try a lower-priority queue.
214 */
215 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
216 continue;
217 }
218
219 atomic_dec(&CPU->nrdy);
220 atomic_dec(&nrdy);
221 CPU->rq[i].n--;
222
223 /*
224 * Take the first thread from the queue.
225 */
226 thread_t *thread =
227 list_get_instance(CPU->rq[i].rq_head.next, thread_t, rq_link);
228 list_remove(&thread->rq_link);
229
230 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
231
232 thread->cpu = CPU;
233 thread->ticks = us2ticks((i + 1) * 10000);
234 thread->priority = i; /* Correct rq index */
235
236 /*
237 * Clear the THREAD_FLAG_STOLEN flag so that t can be migrated
238 * when load balancing needs emerge.
239 */
240 thread->flags &= ~THREAD_FLAG_STOLEN;
241 irq_spinlock_unlock(&thread->lock, false);
242
243 return thread;
244 }
245
246 goto loop;
247}
248
249/** Prevent rq starvation
250 *
251 * Prevent low priority threads from starving in rq's.
252 *
253 * When the function decides to relink rq's, it reconnects
254 * respective pointers so that in result threads with 'pri'
255 * greater or equal start are moved to a higher-priority queue.
256 *
257 * @param start Threshold priority.
258 *
259 */
260static void relink_rq(int start)
261{
262 link_t head;
263
264 list_initialize(&head);
265 irq_spinlock_lock(&CPU->lock, false);
266
267 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
268 int i;
269 for (i = start; i < RQ_COUNT - 1; i++) {
270 /* Remember and empty rq[i + 1] */
271
272 irq_spinlock_lock(&CPU->rq[i + 1].lock, false);
273 list_concat(&head, &CPU->rq[i + 1].rq_head);
274 size_t n = CPU->rq[i + 1].n;
275 CPU->rq[i + 1].n = 0;
276 irq_spinlock_unlock(&CPU->rq[i + 1].lock, false);
277
278 /* Append rq[i + 1] to rq[i] */
279
280 irq_spinlock_lock(&CPU->rq[i].lock, false);
281 list_concat(&CPU->rq[i].rq_head, &head);
282 CPU->rq[i].n += n;
283 irq_spinlock_unlock(&CPU->rq[i].lock, false);
284 }
285
286 CPU->needs_relink = 0;
287 }
288
289 irq_spinlock_unlock(&CPU->lock, false);
290}
291
292/** The scheduler
293 *
294 * The thread scheduling procedure.
295 * Passes control directly to
296 * scheduler_separated_stack().
297 *
298 */
299void scheduler(void)
300{
301 volatile ipl_t ipl;
302
303 ASSERT(CPU != NULL);
304
305 ipl = interrupts_disable();
306
307 if (atomic_get(&haltstate))
308 halt();
309
310 if (THREAD) {
311 irq_spinlock_lock(&THREAD->lock, false);
312
313 /* Update thread kernel accounting */
314 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
315
316#ifndef CONFIG_FPU_LAZY
317 fpu_context_save(THREAD->saved_fpu_context);
318#endif
319 if (!context_save(&THREAD->saved_context)) {
320 /*
321 * This is the place where threads leave scheduler();
322 */
323
324 /* Save current CPU cycle */
325 THREAD->last_cycle = get_cycle();
326
327 irq_spinlock_unlock(&THREAD->lock, false);
328 interrupts_restore(THREAD->saved_context.ipl);
329
330 return;
331 }
332
333 /*
334 * Interrupt priority level of preempted thread is recorded
335 * here to facilitate scheduler() invocations from
336 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
337 *
338 */
339 THREAD->saved_context.ipl = ipl;
340 }
341
342 /*
343 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
344 * and preemption counter. At this point THE could be coming either
345 * from THREAD's or CPU's stack.
346 *
347 */
348 the_copy(THE, (the_t *) CPU->stack);
349
350 /*
351 * We may not keep the old stack.
352 * Reason: If we kept the old stack and got blocked, for instance, in
353 * find_best_thread(), the old thread could get rescheduled by another
354 * CPU and overwrite the part of its own stack that was also used by
355 * the scheduler on this CPU.
356 *
357 * Moreover, we have to bypass the compiler-generated POP sequence
358 * which is fooled by SP being set to the very top of the stack.
359 * Therefore the scheduler() function continues in
360 * scheduler_separated_stack().
361 *
362 */
363 context_save(&CPU->saved_context);
364 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
365 (uintptr_t) CPU->stack, CPU_STACK_SIZE);
366 context_restore(&CPU->saved_context);
367
368 /* Not reached */
369}
370
371/** Scheduler stack switch wrapper
372 *
373 * Second part of the scheduler() function
374 * using new stack. Handling the actual context
375 * switch to a new thread.
376 *
377 */
378void scheduler_separated_stack(void)
379{
380 DEADLOCK_PROBE_INIT(p_joinwq);
381 task_t *old_task = TASK;
382 as_t *old_as = AS;
383
384 ASSERT((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
385 ASSERT(CPU != NULL);
386
387 /*
388 * Hold the current task and the address space to prevent their
389 * possible destruction should thread_destroy() be called on this or any
390 * other processor while the scheduler is still using them.
391 */
392 if (old_task)
393 task_hold(old_task);
394
395 if (old_as)
396 as_hold(old_as);
397
398 if (THREAD) {
399 /* Must be run after the switch to scheduler stack */
400 after_thread_ran();
401
402 switch (THREAD->state) {
403 case Running:
404 irq_spinlock_unlock(&THREAD->lock, false);
405 thread_ready(THREAD);
406 break;
407
408 case Exiting:
409repeat:
410 if (THREAD->detached) {
411 thread_destroy(THREAD, false);
412 } else {
413 /*
414 * The thread structure is kept allocated until
415 * somebody calls thread_detach() on it.
416 */
417 if (!irq_spinlock_trylock(&THREAD->join_wq.lock)) {
418 /*
419 * Avoid deadlock.
420 */
421 irq_spinlock_unlock(&THREAD->lock, false);
422 delay(HZ);
423 irq_spinlock_lock(&THREAD->lock, false);
424 DEADLOCK_PROBE(p_joinwq,
425 DEADLOCK_THRESHOLD);
426 goto repeat;
427 }
428 _waitq_wakeup_unsafe(&THREAD->join_wq,
429 WAKEUP_FIRST);
430 irq_spinlock_unlock(&THREAD->join_wq.lock, false);
431
432 THREAD->state = Lingering;
433 irq_spinlock_unlock(&THREAD->lock, false);
434 }
435 break;
436
437 case Sleeping:
438 /*
439 * Prefer the thread after it's woken up.
440 */
441 THREAD->priority = -1;
442
443 /*
444 * We need to release wq->lock which we locked in
445 * waitq_sleep(). Address of wq->lock is kept in
446 * THREAD->sleep_queue.
447 */
448 irq_spinlock_unlock(&THREAD->sleep_queue->lock, false);
449
450 irq_spinlock_unlock(&THREAD->lock, false);
451 break;
452
453 default:
454 /*
455 * Entering state is unexpected.
456 */
457 panic("tid%" PRIu64 ": unexpected state %s.",
458 THREAD->tid, thread_states[THREAD->state]);
459 break;
460 }
461
462 THREAD = NULL;
463 }
464
465 THREAD = find_best_thread();
466
467 irq_spinlock_lock(&THREAD->lock, false);
468 int priority = THREAD->priority;
469 irq_spinlock_unlock(&THREAD->lock, false);
470
471 relink_rq(priority);
472
473 /*
474 * If both the old and the new task are the same,
475 * lots of work is avoided.
476 */
477 if (TASK != THREAD->task) {
478 as_t *new_as = THREAD->task->as;
479
480 /*
481 * Note that it is possible for two tasks
482 * to share one address space.
483 */
484 if (old_as != new_as) {
485 /*
486 * Both tasks and address spaces are different.
487 * Replace the old one with the new one.
488 */
489 as_switch(old_as, new_as);
490 }
491
492 TASK = THREAD->task;
493 before_task_runs();
494 }
495
496 if (old_task)
497 task_release(old_task);
498
499 if (old_as)
500 as_release(old_as);
501
502 irq_spinlock_lock(&THREAD->lock, false);
503 THREAD->state = Running;
504
505#ifdef SCHEDULER_VERBOSE
506 printf("cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
507 ", nrdy=%ld)\n", CPU->id, THREAD->tid, THREAD->priority,
508 THREAD->ticks, atomic_get(&CPU->nrdy));
509#endif
510
511 /*
512 * Some architectures provide late kernel PA2KA(identity)
513 * mapping in a page fault handler. However, the page fault
514 * handler uses the kernel stack of the running thread and
515 * therefore cannot be used to map it. The kernel stack, if
516 * necessary, is to be mapped in before_thread_runs(). This
517 * function must be executed before the switch to the new stack.
518 */
519 before_thread_runs();
520
521 /*
522 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
523 * thread's stack.
524 */
525 the_copy(THE, (the_t *) THREAD->kstack);
526
527 context_restore(&THREAD->saved_context);
528
529 /* Not reached */
530}
531
532#ifdef CONFIG_SMP
533/** Load balancing thread
534 *
535 * SMP load balancing thread, supervising thread supplies
536 * for the CPU it's wired to.
537 *
538 * @param arg Generic thread argument (unused).
539 *
540 */
541void kcpulb(void *arg)
542{
543 atomic_count_t average;
544 atomic_count_t rdy;
545
546 /*
547 * Detach kcpulb as nobody will call thread_join_timeout() on it.
548 */
549 thread_detach(THREAD);
550
551loop:
552 /*
553 * Work in 1s intervals.
554 */
555 thread_sleep(1);
556
557not_satisfied:
558 /*
559 * Calculate the number of threads that will be migrated/stolen from
560 * other CPU's. Note that situation can have changed between two
561 * passes. Each time get the most up to date counts.
562 *
563 */
564 average = atomic_get(&nrdy) / config.cpu_active + 1;
565 rdy = atomic_get(&CPU->nrdy);
566
567 if (average <= rdy)
568 goto satisfied;
569
570 atomic_count_t count = average - rdy;
571
572 /*
573 * Searching least priority queues on all CPU's first and most priority
574 * queues on all CPU's last.
575 *
576 */
577 size_t acpu;
578 size_t acpu_bias = 0;
579 int rq;
580
581 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
582 for (acpu = 0; acpu < config.cpu_active; acpu++) {
583 cpu_t *cpu = &cpus[(acpu + acpu_bias) % config.cpu_active];
584
585 /*
586 * Not interested in ourselves.
587 * Doesn't require interrupt disabling for kcpulb has
588 * THREAD_FLAG_WIRED.
589 *
590 */
591 if (CPU == cpu)
592 continue;
593
594 if (atomic_get(&cpu->nrdy) <= average)
595 continue;
596
597 irq_spinlock_lock(&(cpu->rq[rq].lock), true);
598 if (cpu->rq[rq].n == 0) {
599 irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
600 continue;
601 }
602
603 thread_t *thread = NULL;
604
605 /* Search rq from the back */
606 link_t *link = cpu->rq[rq].rq_head.prev;
607
608 while (link != &(cpu->rq[rq].rq_head)) {
609 thread = (thread_t *) list_get_instance(link, thread_t, rq_link);
610
611 /*
612 * We don't want to steal CPU-wired threads
613 * neither threads already stolen. The latter
614 * prevents threads from migrating between CPU's
615 * without ever being run. We don't want to
616 * steal threads whose FPU context is still in
617 * CPU.
618 *
619 */
620 irq_spinlock_lock(&thread->lock, false);
621
622 if ((!(thread->flags & (THREAD_FLAG_WIRED | THREAD_FLAG_STOLEN)))
623 && (!(thread->fpu_context_engaged))) {
624 /*
625 * Remove thread from ready queue.
626 */
627 irq_spinlock_unlock(&thread->lock, false);
628
629 atomic_dec(&cpu->nrdy);
630 atomic_dec(&nrdy);
631
632 cpu->rq[rq].n--;
633 list_remove(&thread->rq_link);
634
635 break;
636 }
637
638 irq_spinlock_unlock(&thread->lock, false);
639
640 link = link->prev;
641 thread = NULL;
642 }
643
644 if (thread) {
645 /*
646 * Ready thread on local CPU
647 *
648 */
649
650 irq_spinlock_pass(&(cpu->rq[rq].lock), &thread->lock);
651
652#ifdef KCPULB_VERBOSE
653 printf("kcpulb%u: TID %" PRIu64 " -> cpu%u, "
654 "nrdy=%ld, avg=%ld\n", CPU->id, t->tid,
655 CPU->id, atomic_get(&CPU->nrdy),
656 atomic_get(&nrdy) / config.cpu_active);
657#endif
658
659 thread->flags |= THREAD_FLAG_STOLEN;
660 thread->state = Entering;
661
662 irq_spinlock_unlock(&thread->lock, true);
663 thread_ready(thread);
664
665 if (--count == 0)
666 goto satisfied;
667
668 /*
669 * We are not satisfied yet, focus on another
670 * CPU next time.
671 *
672 */
673 acpu_bias++;
674
675 continue;
676 } else
677 irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
678
679 }
680 }
681
682 if (atomic_get(&CPU->nrdy)) {
683 /*
684 * Be a little bit light-weight and let migrated threads run.
685 *
686 */
687 scheduler();
688 } else {
689 /*
690 * We failed to migrate a single thread.
691 * Give up this turn.
692 *
693 */
694 goto loop;
695 }
696
697 goto not_satisfied;
698
699satisfied:
700 goto loop;
701}
702#endif /* CONFIG_SMP */
703
704/** Print information about threads & scheduler queues
705 *
706 */
707void sched_print_list(void)
708{
709 size_t cpu;
710 for (cpu = 0; cpu < config.cpu_count; cpu++) {
711 if (!cpus[cpu].active)
712 continue;
713
714 irq_spinlock_lock(&cpus[cpu].lock, true);
715
716 printf("cpu%u: address=%p, nrdy=%" PRIua ", needs_relink=%zu\n",
717 cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
718 cpus[cpu].needs_relink);
719
720 unsigned int i;
721 for (i = 0; i < RQ_COUNT; i++) {
722 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
723 if (cpus[cpu].rq[i].n == 0) {
724 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
725 continue;
726 }
727
728 printf("\trq[%u]: ", i);
729 link_t *cur;
730 for (cur = cpus[cpu].rq[i].rq_head.next;
731 cur != &(cpus[cpu].rq[i].rq_head);
732 cur = cur->next) {
733 thread_t *thread = list_get_instance(cur, thread_t, rq_link);
734 printf("%" PRIu64 "(%s) ", thread->tid,
735 thread_states[thread->state]);
736 }
737 printf("\n");
738
739 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
740 }
741
742 irq_spinlock_unlock(&cpus[cpu].lock, true);
743 }
744}
745
746/** @}
747 */
Note: See TracBrowser for help on using the repository browser.