source: mainline/generic/src/proc/scheduler.c@ 7509ddc

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 7509ddc was 2cb5e64, checked in by Jakub Jermar <jakub@…>, 20 years ago

Make use of thread_join_timeout() and thread_detach() in kernel.

Improved comments in slab.h.

  • Property mode set to 100644
File size: 15.8 KB
Line 
1/*
2 * Copyright (C) 2001-2004 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * @file scheduler.c
31 * @brief Scheduler and load balancing.
32 *
33 * This file contains the scheduler and kcpulb kernel thread which
34 * performs load-balancing of per-CPU run queues.
35 */
36
37#include <proc/scheduler.h>
38#include <proc/thread.h>
39#include <proc/task.h>
40#include <mm/frame.h>
41#include <mm/page.h>
42#include <mm/as.h>
43#include <time/delay.h>
44#include <arch/asm.h>
45#include <arch/faddr.h>
46#include <atomic.h>
47#include <synch/spinlock.h>
48#include <config.h>
49#include <context.h>
50#include <func.h>
51#include <arch.h>
52#include <adt/list.h>
53#include <panic.h>
54#include <typedefs.h>
55#include <cpu.h>
56#include <print.h>
57#include <debug.h>
58
59static void before_task_runs(void);
60static void before_thread_runs(void);
61static void after_thread_ran(void);
62static void scheduler_separated_stack(void);
63
64atomic_t nrdy; /**< Number of ready threads in the system. */
65
66/** Carry out actions before new task runs. */
67void before_task_runs(void)
68{
69 before_task_runs_arch();
70}
71
72/** Take actions before new thread runs.
73 *
74 * Perform actions that need to be
75 * taken before the newly selected
76 * tread is passed control.
77 *
78 * THREAD->lock is locked on entry
79 *
80 */
81void before_thread_runs(void)
82{
83 before_thread_runs_arch();
84#ifdef CONFIG_FPU_LAZY
85 if(THREAD==CPU->fpu_owner)
86 fpu_enable();
87 else
88 fpu_disable();
89#else
90 fpu_enable();
91 if (THREAD->fpu_context_exists)
92 fpu_context_restore(THREAD->saved_fpu_context);
93 else {
94 fpu_init();
95 THREAD->fpu_context_exists=1;
96 }
97#endif
98}
99
100/** Take actions after THREAD had run.
101 *
102 * Perform actions that need to be
103 * taken after the running thread
104 * had been preempted by the scheduler.
105 *
106 * THREAD->lock is locked on entry
107 *
108 */
109void after_thread_ran(void)
110{
111 after_thread_ran_arch();
112}
113
114#ifdef CONFIG_FPU_LAZY
115void scheduler_fpu_lazy_request(void)
116{
117restart:
118 fpu_enable();
119 spinlock_lock(&CPU->lock);
120
121 /* Save old context */
122 if (CPU->fpu_owner != NULL) {
123 spinlock_lock(&CPU->fpu_owner->lock);
124 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
125 /* don't prevent migration */
126 CPU->fpu_owner->fpu_context_engaged=0;
127 spinlock_unlock(&CPU->fpu_owner->lock);
128 CPU->fpu_owner = NULL;
129 }
130
131 spinlock_lock(&THREAD->lock);
132 if (THREAD->fpu_context_exists) {
133 fpu_context_restore(THREAD->saved_fpu_context);
134 } else {
135 /* Allocate FPU context */
136 if (!THREAD->saved_fpu_context) {
137 /* Might sleep */
138 spinlock_unlock(&THREAD->lock);
139 spinlock_unlock(&CPU->lock);
140 THREAD->saved_fpu_context = slab_alloc(fpu_context_slab,
141 0);
142 /* We may have switched CPUs during slab_alloc */
143 goto restart;
144 }
145 fpu_init();
146 THREAD->fpu_context_exists=1;
147 }
148 CPU->fpu_owner=THREAD;
149 THREAD->fpu_context_engaged = 1;
150 spinlock_unlock(&THREAD->lock);
151
152 spinlock_unlock(&CPU->lock);
153}
154#endif
155
156/** Initialize scheduler
157 *
158 * Initialize kernel scheduler.
159 *
160 */
161void scheduler_init(void)
162{
163}
164
165/** Get thread to be scheduled
166 *
167 * Get the optimal thread to be scheduled
168 * according to thread accounting and scheduler
169 * policy.
170 *
171 * @return Thread to be scheduled.
172 *
173 */
174static thread_t *find_best_thread(void)
175{
176 thread_t *t;
177 runq_t *r;
178 int i;
179
180 ASSERT(CPU != NULL);
181
182loop:
183 interrupts_enable();
184
185 if (atomic_get(&CPU->nrdy) == 0) {
186 /*
187 * For there was nothing to run, the CPU goes to sleep
188 * until a hardware interrupt or an IPI comes.
189 * This improves energy saving and hyperthreading.
190 */
191
192 /*
193 * An interrupt might occur right now and wake up a thread.
194 * In such case, the CPU will continue to go to sleep
195 * even though there is a runnable thread.
196 */
197
198 cpu_sleep();
199 goto loop;
200 }
201
202 interrupts_disable();
203
204 for (i = 0; i<RQ_COUNT; i++) {
205 r = &CPU->rq[i];
206 spinlock_lock(&r->lock);
207 if (r->n == 0) {
208 /*
209 * If this queue is empty, try a lower-priority queue.
210 */
211 spinlock_unlock(&r->lock);
212 continue;
213 }
214
215 atomic_dec(&CPU->nrdy);
216 atomic_dec(&nrdy);
217 r->n--;
218
219 /*
220 * Take the first thread from the queue.
221 */
222 t = list_get_instance(r->rq_head.next, thread_t, rq_link);
223 list_remove(&t->rq_link);
224
225 spinlock_unlock(&r->lock);
226
227 spinlock_lock(&t->lock);
228 t->cpu = CPU;
229
230 t->ticks = us2ticks((i+1)*10000);
231 t->priority = i; /* correct rq index */
232
233 /*
234 * Clear the X_STOLEN flag so that t can be migrated when load balancing needs emerge.
235 */
236 t->flags &= ~X_STOLEN;
237 spinlock_unlock(&t->lock);
238
239 return t;
240 }
241 goto loop;
242
243}
244
245/** Prevent rq starvation
246 *
247 * Prevent low priority threads from starving in rq's.
248 *
249 * When the function decides to relink rq's, it reconnects
250 * respective pointers so that in result threads with 'pri'
251 * greater or equal @start are moved to a higher-priority queue.
252 *
253 * @param start Threshold priority.
254 *
255 */
256static void relink_rq(int start)
257{
258 link_t head;
259 runq_t *r;
260 int i, n;
261
262 list_initialize(&head);
263 spinlock_lock(&CPU->lock);
264 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
265 for (i = start; i<RQ_COUNT-1; i++) {
266 /* remember and empty rq[i + 1] */
267 r = &CPU->rq[i + 1];
268 spinlock_lock(&r->lock);
269 list_concat(&head, &r->rq_head);
270 n = r->n;
271 r->n = 0;
272 spinlock_unlock(&r->lock);
273
274 /* append rq[i + 1] to rq[i] */
275 r = &CPU->rq[i];
276 spinlock_lock(&r->lock);
277 list_concat(&r->rq_head, &head);
278 r->n += n;
279 spinlock_unlock(&r->lock);
280 }
281 CPU->needs_relink = 0;
282 }
283 spinlock_unlock(&CPU->lock);
284
285}
286
287/** The scheduler
288 *
289 * The thread scheduling procedure.
290 * Passes control directly to
291 * scheduler_separated_stack().
292 *
293 */
294void scheduler(void)
295{
296 volatile ipl_t ipl;
297
298 ASSERT(CPU != NULL);
299
300 ipl = interrupts_disable();
301
302 if (atomic_get(&haltstate))
303 halt();
304
305 if (THREAD) {
306 spinlock_lock(&THREAD->lock);
307#ifndef CONFIG_FPU_LAZY
308 fpu_context_save(THREAD->saved_fpu_context);
309#endif
310 if (!context_save(&THREAD->saved_context)) {
311 /*
312 * This is the place where threads leave scheduler();
313 */
314 spinlock_unlock(&THREAD->lock);
315 interrupts_restore(THREAD->saved_context.ipl);
316
317 return;
318 }
319
320 /*
321 * Interrupt priority level of preempted thread is recorded here
322 * to facilitate scheduler() invocations from interrupts_disable()'d
323 * code (e.g. waitq_sleep_timeout()).
324 */
325 THREAD->saved_context.ipl = ipl;
326 }
327
328 /*
329 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
330 * and preemption counter. At this point THE could be coming either
331 * from THREAD's or CPU's stack.
332 */
333 the_copy(THE, (the_t *) CPU->stack);
334
335 /*
336 * We may not keep the old stack.
337 * Reason: If we kept the old stack and got blocked, for instance, in
338 * find_best_thread(), the old thread could get rescheduled by another
339 * CPU and overwrite the part of its own stack that was also used by
340 * the scheduler on this CPU.
341 *
342 * Moreover, we have to bypass the compiler-generated POP sequence
343 * which is fooled by SP being set to the very top of the stack.
344 * Therefore the scheduler() function continues in
345 * scheduler_separated_stack().
346 */
347 context_save(&CPU->saved_context);
348 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack), (__address) CPU->stack, CPU_STACK_SIZE);
349 context_restore(&CPU->saved_context);
350 /* not reached */
351}
352
353/** Scheduler stack switch wrapper
354 *
355 * Second part of the scheduler() function
356 * using new stack. Handling the actual context
357 * switch to a new thread.
358 *
359 * Assume THREAD->lock is held.
360 */
361void scheduler_separated_stack(void)
362{
363 int priority;
364
365 ASSERT(CPU != NULL);
366
367 if (THREAD) {
368 /* must be run after the switch to scheduler stack */
369 after_thread_ran();
370
371 switch (THREAD->state) {
372 case Running:
373 spinlock_unlock(&THREAD->lock);
374 thread_ready(THREAD);
375 break;
376
377 case Exiting:
378repeat:
379 if (THREAD->detached) {
380 thread_destroy(THREAD);
381 } else {
382 /*
383 * The thread structure is kept allocated until somebody
384 * calls thread_detach() on it.
385 */
386 if (!spinlock_trylock(&THREAD->join_wq.lock)) {
387 /*
388 * Avoid deadlock.
389 */
390 spinlock_unlock(&THREAD->lock);
391 delay(10);
392 spinlock_lock(&THREAD->lock);
393 goto repeat;
394 }
395 _waitq_wakeup_unsafe(&THREAD->join_wq, false);
396 spinlock_unlock(&THREAD->join_wq.lock);
397
398 THREAD->state = Undead;
399 spinlock_unlock(&THREAD->lock);
400 }
401 break;
402
403 case Sleeping:
404 /*
405 * Prefer the thread after it's woken up.
406 */
407 THREAD->priority = -1;
408
409 /*
410 * We need to release wq->lock which we locked in waitq_sleep().
411 * Address of wq->lock is kept in THREAD->sleep_queue.
412 */
413 spinlock_unlock(&THREAD->sleep_queue->lock);
414
415 /*
416 * Check for possible requests for out-of-context invocation.
417 */
418 if (THREAD->call_me) {
419 THREAD->call_me(THREAD->call_me_with);
420 THREAD->call_me = NULL;
421 THREAD->call_me_with = NULL;
422 }
423
424 spinlock_unlock(&THREAD->lock);
425
426 break;
427
428 default:
429 /*
430 * Entering state is unexpected.
431 */
432 panic("tid%d: unexpected state %s\n", THREAD->tid, thread_states[THREAD->state]);
433 break;
434 }
435
436 THREAD = NULL;
437 }
438
439 THREAD = find_best_thread();
440
441 spinlock_lock(&THREAD->lock);
442 priority = THREAD->priority;
443 spinlock_unlock(&THREAD->lock);
444
445 relink_rq(priority);
446
447 /*
448 * If both the old and the new task are the same, lots of work is avoided.
449 */
450 if (TASK != THREAD->task) {
451 as_t *as1 = NULL;
452 as_t *as2;
453
454 if (TASK) {
455 spinlock_lock(&TASK->lock);
456 as1 = TASK->as;
457 spinlock_unlock(&TASK->lock);
458 }
459
460 spinlock_lock(&THREAD->task->lock);
461 as2 = THREAD->task->as;
462 spinlock_unlock(&THREAD->task->lock);
463
464 /*
465 * Note that it is possible for two tasks to share one address space.
466 */
467 if (as1 != as2) {
468 /*
469 * Both tasks and address spaces are different.
470 * Replace the old one with the new one.
471 */
472 as_switch(as1, as2);
473 }
474 TASK = THREAD->task;
475 before_task_runs();
476 }
477
478 spinlock_lock(&THREAD->lock);
479 THREAD->state = Running;
480
481#ifdef SCHEDULER_VERBOSE
482 printf("cpu%d: tid %d (priority=%d,ticks=%lld,nrdy=%ld)\n", CPU->id, THREAD->tid, THREAD->priority, THREAD->ticks, atomic_get(&CPU->nrdy));
483#endif
484
485 /*
486 * Some architectures provide late kernel PA2KA(identity)
487 * mapping in a page fault handler. However, the page fault
488 * handler uses the kernel stack of the running thread and
489 * therefore cannot be used to map it. The kernel stack, if
490 * necessary, is to be mapped in before_thread_runs(). This
491 * function must be executed before the switch to the new stack.
492 */
493 before_thread_runs();
494
495 /*
496 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to thread's stack.
497 */
498 the_copy(THE, (the_t *) THREAD->kstack);
499
500 context_restore(&THREAD->saved_context);
501 /* not reached */
502}
503
504#ifdef CONFIG_SMP
505/** Load balancing thread
506 *
507 * SMP load balancing thread, supervising thread supplies
508 * for the CPU it's wired to.
509 *
510 * @param arg Generic thread argument (unused).
511 *
512 */
513void kcpulb(void *arg)
514{
515 thread_t *t;
516 int count, average, i, j, k = 0;
517 ipl_t ipl;
518
519 /*
520 * Detach kcpulb as nobody will call thread_join_timeout() on it.
521 */
522 thread_detach(THREAD);
523
524loop:
525 /*
526 * Work in 1s intervals.
527 */
528 thread_sleep(1);
529
530not_satisfied:
531 /*
532 * Calculate the number of threads that will be migrated/stolen from
533 * other CPU's. Note that situation can have changed between two
534 * passes. Each time get the most up to date counts.
535 */
536 average = atomic_get(&nrdy) / config.cpu_active + 1;
537 count = average - atomic_get(&CPU->nrdy);
538
539 if (count <= 0)
540 goto satisfied;
541
542 /*
543 * Searching least priority queues on all CPU's first and most priority queues on all CPU's last.
544 */
545 for (j=RQ_COUNT-1; j >= 0; j--) {
546 for (i=0; i < config.cpu_active; i++) {
547 link_t *l;
548 runq_t *r;
549 cpu_t *cpu;
550
551 cpu = &cpus[(i + k) % config.cpu_active];
552
553 /*
554 * Not interested in ourselves.
555 * Doesn't require interrupt disabling for kcpulb is X_WIRED.
556 */
557 if (CPU == cpu)
558 continue;
559 if (atomic_get(&cpu->nrdy) <= average)
560 continue;
561
562 ipl = interrupts_disable();
563 r = &cpu->rq[j];
564 spinlock_lock(&r->lock);
565 if (r->n == 0) {
566 spinlock_unlock(&r->lock);
567 interrupts_restore(ipl);
568 continue;
569 }
570
571 t = NULL;
572 l = r->rq_head.prev; /* search rq from the back */
573 while (l != &r->rq_head) {
574 t = list_get_instance(l, thread_t, rq_link);
575 /*
576 * We don't want to steal CPU-wired threads neither threads already stolen.
577 * The latter prevents threads from migrating between CPU's without ever being run.
578 * We don't want to steal threads whose FPU context is still in CPU.
579 */
580 spinlock_lock(&t->lock);
581 if ( (!(t->flags & (X_WIRED | X_STOLEN))) && (!(t->fpu_context_engaged)) ) {
582 /*
583 * Remove t from r.
584 */
585 spinlock_unlock(&t->lock);
586
587 atomic_dec(&cpu->nrdy);
588 atomic_dec(&nrdy);
589
590 r->n--;
591 list_remove(&t->rq_link);
592
593 break;
594 }
595 spinlock_unlock(&t->lock);
596 l = l->prev;
597 t = NULL;
598 }
599 spinlock_unlock(&r->lock);
600
601 if (t) {
602 /*
603 * Ready t on local CPU
604 */
605 spinlock_lock(&t->lock);
606#ifdef KCPULB_VERBOSE
607 printf("kcpulb%d: TID %d -> cpu%d, nrdy=%ld, avg=%nd\n", CPU->id, t->tid, CPU->id, atomic_get(&CPU->nrdy), atomic_get(&nrdy) / config.cpu_active);
608#endif
609 t->flags |= X_STOLEN;
610 t->state = Entering;
611 spinlock_unlock(&t->lock);
612
613 thread_ready(t);
614
615 interrupts_restore(ipl);
616
617 if (--count == 0)
618 goto satisfied;
619
620 /*
621 * We are not satisfied yet, focus on another CPU next time.
622 */
623 k++;
624
625 continue;
626 }
627 interrupts_restore(ipl);
628 }
629 }
630
631 if (atomic_get(&CPU->nrdy)) {
632 /*
633 * Be a little bit light-weight and let migrated threads run.
634 */
635 scheduler();
636 } else {
637 /*
638 * We failed to migrate a single thread.
639 * Give up this turn.
640 */
641 goto loop;
642 }
643
644 goto not_satisfied;
645
646satisfied:
647 goto loop;
648}
649
650#endif /* CONFIG_SMP */
651
652
653/** Print information about threads & scheduler queues */
654void sched_print_list(void)
655{
656 ipl_t ipl;
657 int cpu,i;
658 runq_t *r;
659 thread_t *t;
660 link_t *cur;
661
662 /* We are going to mess with scheduler structures,
663 * let's not be interrupted */
664 ipl = interrupts_disable();
665 for (cpu=0;cpu < config.cpu_count; cpu++) {
666
667 if (!cpus[cpu].active)
668 continue;
669
670 spinlock_lock(&cpus[cpu].lock);
671 printf("cpu%d: address=%p, nrdy=%ld, needs_relink=%ld\n",
672 cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy), cpus[cpu].needs_relink);
673
674 for (i=0; i<RQ_COUNT; i++) {
675 r = &cpus[cpu].rq[i];
676 spinlock_lock(&r->lock);
677 if (!r->n) {
678 spinlock_unlock(&r->lock);
679 continue;
680 }
681 printf("\trq[%d]: ", i);
682 for (cur=r->rq_head.next; cur!=&r->rq_head; cur=cur->next) {
683 t = list_get_instance(cur, thread_t, rq_link);
684 printf("%d(%s) ", t->tid,
685 thread_states[t->state]);
686 }
687 printf("\n");
688 spinlock_unlock(&r->lock);
689 }
690 spinlock_unlock(&cpus[cpu].lock);
691 }
692
693 interrupts_restore(ipl);
694}
Note: See TracBrowser for help on using the repository browser.