source: mainline/kernel/generic/src/proc/scheduler.c@ cb4afd8b

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since cb4afd8b was ea7890e7, checked in by Jakub Jermar <jakub@…>, 18 years ago

More efficient and simpler task termination.

Based on the assumption, that after its creation, only the task itself can create more threads for itself,
the last thread with userspace context to execute thread_exit() will perform futex and IPC cleanup. When
the task has no threads, it is destroyed. Both the cleanup and destruction is controlled by reference
counting.

As for userspace threads, even though there could be a global garbage collector for joining threads, it is
much simpler if the uinit thread detaches itself before switching to userspace.

task_kill() is now an idempotent operation. It just instructs the threads within a task to exit.

Change in the name of a thread state: Undead → JoinMe.

  • Property mode set to 100644
File size: 16.4 KB
Line 
1/*
2 * Copyright (c) 2001-2007 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericproc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <proc/scheduler.h>
42#include <proc/thread.h>
43#include <proc/task.h>
44#include <mm/frame.h>
45#include <mm/page.h>
46#include <mm/as.h>
47#include <time/timeout.h>
48#include <time/delay.h>
49#include <arch/asm.h>
50#include <arch/faddr.h>
51#include <arch/cycle.h>
52#include <atomic.h>
53#include <synch/spinlock.h>
54#include <config.h>
55#include <context.h>
56#include <fpu_context.h>
57#include <func.h>
58#include <arch.h>
59#include <adt/list.h>
60#include <panic.h>
61#include <cpu.h>
62#include <print.h>
63#include <debug.h>
64
65static void before_task_runs(void);
66static void before_thread_runs(void);
67static void after_thread_ran(void);
68static void scheduler_separated_stack(void);
69
70atomic_t nrdy; /**< Number of ready threads in the system. */
71
72/** Carry out actions before new task runs. */
73void before_task_runs(void)
74{
75 before_task_runs_arch();
76}
77
78/** Take actions before new thread runs.
79 *
80 * Perform actions that need to be
81 * taken before the newly selected
82 * tread is passed control.
83 *
84 * THREAD->lock is locked on entry
85 *
86 */
87void before_thread_runs(void)
88{
89 before_thread_runs_arch();
90#ifdef CONFIG_FPU_LAZY
91 if(THREAD == CPU->fpu_owner)
92 fpu_enable();
93 else
94 fpu_disable();
95#else
96 fpu_enable();
97 if (THREAD->fpu_context_exists)
98 fpu_context_restore(THREAD->saved_fpu_context);
99 else {
100 fpu_init();
101 THREAD->fpu_context_exists = 1;
102 }
103#endif
104}
105
106/** Take actions after THREAD had run.
107 *
108 * Perform actions that need to be
109 * taken after the running thread
110 * had been preempted by the scheduler.
111 *
112 * THREAD->lock is locked on entry
113 *
114 */
115void after_thread_ran(void)
116{
117 after_thread_ran_arch();
118}
119
120#ifdef CONFIG_FPU_LAZY
121void scheduler_fpu_lazy_request(void)
122{
123restart:
124 fpu_enable();
125 spinlock_lock(&CPU->lock);
126
127 /* Save old context */
128 if (CPU->fpu_owner != NULL) {
129 spinlock_lock(&CPU->fpu_owner->lock);
130 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
131 /* don't prevent migration */
132 CPU->fpu_owner->fpu_context_engaged = 0;
133 spinlock_unlock(&CPU->fpu_owner->lock);
134 CPU->fpu_owner = NULL;
135 }
136
137 spinlock_lock(&THREAD->lock);
138 if (THREAD->fpu_context_exists) {
139 fpu_context_restore(THREAD->saved_fpu_context);
140 } else {
141 /* Allocate FPU context */
142 if (!THREAD->saved_fpu_context) {
143 /* Might sleep */
144 spinlock_unlock(&THREAD->lock);
145 spinlock_unlock(&CPU->lock);
146 THREAD->saved_fpu_context =
147 (fpu_context_t *) slab_alloc(fpu_context_slab, 0);
148 /* We may have switched CPUs during slab_alloc */
149 goto restart;
150 }
151 fpu_init();
152 THREAD->fpu_context_exists = 1;
153 }
154 CPU->fpu_owner = THREAD;
155 THREAD->fpu_context_engaged = 1;
156 spinlock_unlock(&THREAD->lock);
157
158 spinlock_unlock(&CPU->lock);
159}
160#endif
161
162/** Initialize scheduler
163 *
164 * Initialize kernel scheduler.
165 *
166 */
167void scheduler_init(void)
168{
169}
170
171/** Get thread to be scheduled
172 *
173 * Get the optimal thread to be scheduled
174 * according to thread accounting and scheduler
175 * policy.
176 *
177 * @return Thread to be scheduled.
178 *
179 */
180static thread_t *find_best_thread(void)
181{
182 thread_t *t;
183 runq_t *r;
184 int i;
185
186 ASSERT(CPU != NULL);
187
188loop:
189 interrupts_enable();
190
191 if (atomic_get(&CPU->nrdy) == 0) {
192 /*
193 * For there was nothing to run, the CPU goes to sleep
194 * until a hardware interrupt or an IPI comes.
195 * This improves energy saving and hyperthreading.
196 */
197
198 /*
199 * An interrupt might occur right now and wake up a thread.
200 * In such case, the CPU will continue to go to sleep
201 * even though there is a runnable thread.
202 */
203
204 cpu_sleep();
205 goto loop;
206 }
207
208 interrupts_disable();
209
210 for (i = 0; i < RQ_COUNT; i++) {
211 r = &CPU->rq[i];
212 spinlock_lock(&r->lock);
213 if (r->n == 0) {
214 /*
215 * If this queue is empty, try a lower-priority queue.
216 */
217 spinlock_unlock(&r->lock);
218 continue;
219 }
220
221 atomic_dec(&CPU->nrdy);
222 atomic_dec(&nrdy);
223 r->n--;
224
225 /*
226 * Take the first thread from the queue.
227 */
228 t = list_get_instance(r->rq_head.next, thread_t, rq_link);
229 list_remove(&t->rq_link);
230
231 spinlock_unlock(&r->lock);
232
233 spinlock_lock(&t->lock);
234 t->cpu = CPU;
235
236 t->ticks = us2ticks((i + 1) * 10000);
237 t->priority = i; /* correct rq index */
238
239 /*
240 * Clear the THREAD_FLAG_STOLEN flag so that t can be migrated
241 * when load balancing needs emerge.
242 */
243 t->flags &= ~THREAD_FLAG_STOLEN;
244 spinlock_unlock(&t->lock);
245
246 return t;
247 }
248 goto loop;
249
250}
251
252/** Prevent rq starvation
253 *
254 * Prevent low priority threads from starving in rq's.
255 *
256 * When the function decides to relink rq's, it reconnects
257 * respective pointers so that in result threads with 'pri'
258 * greater or equal start are moved to a higher-priority queue.
259 *
260 * @param start Threshold priority.
261 *
262 */
263static void relink_rq(int start)
264{
265 link_t head;
266 runq_t *r;
267 int i, n;
268
269 list_initialize(&head);
270 spinlock_lock(&CPU->lock);
271 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
272 for (i = start; i < RQ_COUNT - 1; i++) {
273 /* remember and empty rq[i + 1] */
274 r = &CPU->rq[i + 1];
275 spinlock_lock(&r->lock);
276 list_concat(&head, &r->rq_head);
277 n = r->n;
278 r->n = 0;
279 spinlock_unlock(&r->lock);
280
281 /* append rq[i + 1] to rq[i] */
282 r = &CPU->rq[i];
283 spinlock_lock(&r->lock);
284 list_concat(&r->rq_head, &head);
285 r->n += n;
286 spinlock_unlock(&r->lock);
287 }
288 CPU->needs_relink = 0;
289 }
290 spinlock_unlock(&CPU->lock);
291
292}
293
294/** The scheduler
295 *
296 * The thread scheduling procedure.
297 * Passes control directly to
298 * scheduler_separated_stack().
299 *
300 */
301void scheduler(void)
302{
303 volatile ipl_t ipl;
304
305 ASSERT(CPU != NULL);
306
307 ipl = interrupts_disable();
308
309 if (atomic_get(&haltstate))
310 halt();
311
312 if (THREAD) {
313 spinlock_lock(&THREAD->lock);
314
315 /* Update thread accounting */
316 THREAD->cycles += get_cycle() - THREAD->last_cycle;
317
318#ifndef CONFIG_FPU_LAZY
319 fpu_context_save(THREAD->saved_fpu_context);
320#endif
321 if (!context_save(&THREAD->saved_context)) {
322 /*
323 * This is the place where threads leave scheduler();
324 */
325
326 /* Save current CPU cycle */
327 THREAD->last_cycle = get_cycle();
328
329 spinlock_unlock(&THREAD->lock);
330 interrupts_restore(THREAD->saved_context.ipl);
331
332 return;
333 }
334
335 /*
336 * Interrupt priority level of preempted thread is recorded
337 * here to facilitate scheduler() invocations from
338 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
339 */
340 THREAD->saved_context.ipl = ipl;
341 }
342
343 /*
344 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
345 * and preemption counter. At this point THE could be coming either
346 * from THREAD's or CPU's stack.
347 */
348 the_copy(THE, (the_t *) CPU->stack);
349
350 /*
351 * We may not keep the old stack.
352 * Reason: If we kept the old stack and got blocked, for instance, in
353 * find_best_thread(), the old thread could get rescheduled by another
354 * CPU and overwrite the part of its own stack that was also used by
355 * the scheduler on this CPU.
356 *
357 * Moreover, we have to bypass the compiler-generated POP sequence
358 * which is fooled by SP being set to the very top of the stack.
359 * Therefore the scheduler() function continues in
360 * scheduler_separated_stack().
361 */
362 context_save(&CPU->saved_context);
363 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
364 (uintptr_t) CPU->stack, CPU_STACK_SIZE);
365 context_restore(&CPU->saved_context);
366 /* not reached */
367}
368
369/** Scheduler stack switch wrapper
370 *
371 * Second part of the scheduler() function
372 * using new stack. Handling the actual context
373 * switch to a new thread.
374 *
375 * Assume THREAD->lock is held.
376 */
377void scheduler_separated_stack(void)
378{
379 int priority;
380 DEADLOCK_PROBE_INIT(p_joinwq);
381
382 ASSERT(CPU != NULL);
383
384 if (THREAD) {
385 /* must be run after the switch to scheduler stack */
386 after_thread_ran();
387
388 switch (THREAD->state) {
389 case Running:
390 spinlock_unlock(&THREAD->lock);
391 thread_ready(THREAD);
392 break;
393
394 case Exiting:
395repeat:
396 if (THREAD->detached) {
397 thread_destroy(THREAD);
398 } else {
399 /*
400 * The thread structure is kept allocated until
401 * somebody calls thread_detach() on it.
402 */
403 if (!spinlock_trylock(&THREAD->join_wq.lock)) {
404 /*
405 * Avoid deadlock.
406 */
407 spinlock_unlock(&THREAD->lock);
408 delay(HZ);
409 spinlock_lock(&THREAD->lock);
410 DEADLOCK_PROBE(p_joinwq,
411 DEADLOCK_THRESHOLD);
412 goto repeat;
413 }
414 _waitq_wakeup_unsafe(&THREAD->join_wq,
415 WAKEUP_FIRST);
416 spinlock_unlock(&THREAD->join_wq.lock);
417
418 THREAD->state = JoinMe;
419 spinlock_unlock(&THREAD->lock);
420 }
421 break;
422
423 case Sleeping:
424 /*
425 * Prefer the thread after it's woken up.
426 */
427 THREAD->priority = -1;
428
429 /*
430 * We need to release wq->lock which we locked in
431 * waitq_sleep(). Address of wq->lock is kept in
432 * THREAD->sleep_queue.
433 */
434 spinlock_unlock(&THREAD->sleep_queue->lock);
435
436 /*
437 * Check for possible requests for out-of-context
438 * invocation.
439 */
440 if (THREAD->call_me) {
441 THREAD->call_me(THREAD->call_me_with);
442 THREAD->call_me = NULL;
443 THREAD->call_me_with = NULL;
444 }
445
446 spinlock_unlock(&THREAD->lock);
447
448 break;
449
450 default:
451 /*
452 * Entering state is unexpected.
453 */
454 panic("tid%llu: unexpected state %s\n", THREAD->tid,
455 thread_states[THREAD->state]);
456 break;
457 }
458
459 THREAD = NULL;
460 }
461
462 THREAD = find_best_thread();
463
464 spinlock_lock(&THREAD->lock);
465 priority = THREAD->priority;
466 spinlock_unlock(&THREAD->lock);
467
468 relink_rq(priority);
469
470 /*
471 * If both the old and the new task are the same, lots of work is
472 * avoided.
473 */
474 if (TASK != THREAD->task) {
475 as_t *as1 = NULL;
476 as_t *as2;
477
478 if (TASK) {
479 spinlock_lock(&TASK->lock);
480 as1 = TASK->as;
481 spinlock_unlock(&TASK->lock);
482 }
483
484 spinlock_lock(&THREAD->task->lock);
485 as2 = THREAD->task->as;
486 spinlock_unlock(&THREAD->task->lock);
487
488 /*
489 * Note that it is possible for two tasks to share one address
490 * space.
491 */
492 if (as1 != as2) {
493 /*
494 * Both tasks and address spaces are different.
495 * Replace the old one with the new one.
496 */
497 as_switch(as1, as2);
498 }
499 TASK = THREAD->task;
500 before_task_runs();
501 }
502
503 spinlock_lock(&THREAD->lock);
504 THREAD->state = Running;
505
506#ifdef SCHEDULER_VERBOSE
507 printf("cpu%d: tid %llu (priority=%d, ticks=%llu, nrdy=%ld)\n",
508 CPU->id, THREAD->tid, THREAD->priority, THREAD->ticks,
509 atomic_get(&CPU->nrdy));
510#endif
511
512 /*
513 * Some architectures provide late kernel PA2KA(identity)
514 * mapping in a page fault handler. However, the page fault
515 * handler uses the kernel stack of the running thread and
516 * therefore cannot be used to map it. The kernel stack, if
517 * necessary, is to be mapped in before_thread_runs(). This
518 * function must be executed before the switch to the new stack.
519 */
520 before_thread_runs();
521
522 /*
523 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
524 * thread's stack.
525 */
526 the_copy(THE, (the_t *) THREAD->kstack);
527
528 context_restore(&THREAD->saved_context);
529 /* not reached */
530}
531
532#ifdef CONFIG_SMP
533/** Load balancing thread
534 *
535 * SMP load balancing thread, supervising thread supplies
536 * for the CPU it's wired to.
537 *
538 * @param arg Generic thread argument (unused).
539 *
540 */
541void kcpulb(void *arg)
542{
543 thread_t *t;
544 int count, average, j, k = 0;
545 unsigned int i;
546 ipl_t ipl;
547
548 /*
549 * Detach kcpulb as nobody will call thread_join_timeout() on it.
550 */
551 thread_detach(THREAD);
552
553loop:
554 /*
555 * Work in 1s intervals.
556 */
557 thread_sleep(1);
558
559not_satisfied:
560 /*
561 * Calculate the number of threads that will be migrated/stolen from
562 * other CPU's. Note that situation can have changed between two
563 * passes. Each time get the most up to date counts.
564 */
565 average = atomic_get(&nrdy) / config.cpu_active + 1;
566 count = average - atomic_get(&CPU->nrdy);
567
568 if (count <= 0)
569 goto satisfied;
570
571 /*
572 * Searching least priority queues on all CPU's first and most priority
573 * queues on all CPU's last.
574 */
575 for (j = RQ_COUNT - 1; j >= 0; j--) {
576 for (i = 0; i < config.cpu_active; i++) {
577 link_t *l;
578 runq_t *r;
579 cpu_t *cpu;
580
581 cpu = &cpus[(i + k) % config.cpu_active];
582
583 /*
584 * Not interested in ourselves.
585 * Doesn't require interrupt disabling for kcpulb has
586 * THREAD_FLAG_WIRED.
587 */
588 if (CPU == cpu)
589 continue;
590 if (atomic_get(&cpu->nrdy) <= average)
591 continue;
592
593 ipl = interrupts_disable();
594 r = &cpu->rq[j];
595 spinlock_lock(&r->lock);
596 if (r->n == 0) {
597 spinlock_unlock(&r->lock);
598 interrupts_restore(ipl);
599 continue;
600 }
601
602 t = NULL;
603 l = r->rq_head.prev; /* search rq from the back */
604 while (l != &r->rq_head) {
605 t = list_get_instance(l, thread_t, rq_link);
606 /*
607 * We don't want to steal CPU-wired threads
608 * neither threads already stolen. The latter
609 * prevents threads from migrating between CPU's
610 * without ever being run. We don't want to
611 * steal threads whose FPU context is still in
612 * CPU.
613 */
614 spinlock_lock(&t->lock);
615 if ((!(t->flags & (THREAD_FLAG_WIRED |
616 THREAD_FLAG_STOLEN))) &&
617 (!(t->fpu_context_engaged))) {
618 /*
619 * Remove t from r.
620 */
621 spinlock_unlock(&t->lock);
622
623 atomic_dec(&cpu->nrdy);
624 atomic_dec(&nrdy);
625
626 r->n--;
627 list_remove(&t->rq_link);
628
629 break;
630 }
631 spinlock_unlock(&t->lock);
632 l = l->prev;
633 t = NULL;
634 }
635 spinlock_unlock(&r->lock);
636
637 if (t) {
638 /*
639 * Ready t on local CPU
640 */
641 spinlock_lock(&t->lock);
642#ifdef KCPULB_VERBOSE
643 printf("kcpulb%d: TID %llu -> cpu%d, nrdy=%ld, "
644 "avg=%nd\n", CPU->id, t->tid, CPU->id,
645 atomic_get(&CPU->nrdy),
646 atomic_get(&nrdy) / config.cpu_active);
647#endif
648 t->flags |= THREAD_FLAG_STOLEN;
649 t->state = Entering;
650 spinlock_unlock(&t->lock);
651
652 thread_ready(t);
653
654 interrupts_restore(ipl);
655
656 if (--count == 0)
657 goto satisfied;
658
659 /*
660 * We are not satisfied yet, focus on another
661 * CPU next time.
662 */
663 k++;
664
665 continue;
666 }
667 interrupts_restore(ipl);
668 }
669 }
670
671 if (atomic_get(&CPU->nrdy)) {
672 /*
673 * Be a little bit light-weight and let migrated threads run.
674 */
675 scheduler();
676 } else {
677 /*
678 * We failed to migrate a single thread.
679 * Give up this turn.
680 */
681 goto loop;
682 }
683
684 goto not_satisfied;
685
686satisfied:
687 goto loop;
688}
689
690#endif /* CONFIG_SMP */
691
692
693/** Print information about threads & scheduler queues */
694void sched_print_list(void)
695{
696 ipl_t ipl;
697 unsigned int cpu, i;
698 runq_t *r;
699 thread_t *t;
700 link_t *cur;
701
702 /* We are going to mess with scheduler structures,
703 * let's not be interrupted */
704 ipl = interrupts_disable();
705 for (cpu = 0; cpu < config.cpu_count; cpu++) {
706
707 if (!cpus[cpu].active)
708 continue;
709
710 spinlock_lock(&cpus[cpu].lock);
711 printf("cpu%d: address=%p, nrdy=%ld, needs_relink=%ld\n",
712 cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
713 cpus[cpu].needs_relink);
714
715 for (i = 0; i < RQ_COUNT; i++) {
716 r = &cpus[cpu].rq[i];
717 spinlock_lock(&r->lock);
718 if (!r->n) {
719 spinlock_unlock(&r->lock);
720 continue;
721 }
722 printf("\trq[%d]: ", i);
723 for (cur = r->rq_head.next; cur != &r->rq_head;
724 cur = cur->next) {
725 t = list_get_instance(cur, thread_t, rq_link);
726 printf("%llu(%s) ", t->tid,
727 thread_states[t->state]);
728 }
729 printf("\n");
730 spinlock_unlock(&r->lock);
731 }
732 spinlock_unlock(&cpus[cpu].lock);
733 }
734
735 interrupts_restore(ipl);
736}
737
738/** @}
739 */
Note: See TracBrowser for help on using the repository browser.