source: mainline/kernel/generic/src/proc/scheduler.c@ 1624aae

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 1624aae was 481d4751, checked in by Jakub Jermar <jakub@…>, 15 years ago

Fix a race condition between the scheduler and as_destroy().

It was possible for the scheduler to use page tables of an address space which
was already destroyed. Prevent this from happening by holding extra references
to the current task and the current address space in
scheduler_separated_stack().

  • Property mode set to 100644
File size: 16.8 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericproc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <proc/scheduler.h>
42#include <proc/thread.h>
43#include <proc/task.h>
44#include <mm/frame.h>
45#include <mm/page.h>
46#include <mm/as.h>
47#include <time/timeout.h>
48#include <time/delay.h>
49#include <arch/asm.h>
50#include <arch/faddr.h>
51#include <arch/cycle.h>
52#include <atomic.h>
53#include <synch/spinlock.h>
54#include <config.h>
55#include <context.h>
56#include <fpu_context.h>
57#include <func.h>
58#include <arch.h>
59#include <adt/list.h>
60#include <panic.h>
61#include <cpu.h>
62#include <print.h>
63#include <debug.h>
64
65static void before_task_runs(void);
66static void before_thread_runs(void);
67static void after_thread_ran(void);
68static void scheduler_separated_stack(void);
69
70atomic_t nrdy; /**< Number of ready threads in the system. */
71
72/** Carry out actions before new task runs. */
73void before_task_runs(void)
74{
75 before_task_runs_arch();
76}
77
78/** Take actions before new thread runs.
79 *
80 * Perform actions that need to be
81 * taken before the newly selected
82 * tread is passed control.
83 *
84 * THREAD->lock is locked on entry
85 *
86 */
87void before_thread_runs(void)
88{
89 before_thread_runs_arch();
90#ifdef CONFIG_FPU_LAZY
91 if(THREAD == CPU->fpu_owner)
92 fpu_enable();
93 else
94 fpu_disable();
95#else
96 fpu_enable();
97 if (THREAD->fpu_context_exists)
98 fpu_context_restore(THREAD->saved_fpu_context);
99 else {
100 fpu_init();
101 THREAD->fpu_context_exists = 1;
102 }
103#endif
104}
105
106/** Take actions after THREAD had run.
107 *
108 * Perform actions that need to be
109 * taken after the running thread
110 * had been preempted by the scheduler.
111 *
112 * THREAD->lock is locked on entry
113 *
114 */
115void after_thread_ran(void)
116{
117 after_thread_ran_arch();
118}
119
120#ifdef CONFIG_FPU_LAZY
121void scheduler_fpu_lazy_request(void)
122{
123restart:
124 fpu_enable();
125 spinlock_lock(&CPU->lock);
126
127 /* Save old context */
128 if (CPU->fpu_owner != NULL) {
129 spinlock_lock(&CPU->fpu_owner->lock);
130 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
131 /* don't prevent migration */
132 CPU->fpu_owner->fpu_context_engaged = 0;
133 spinlock_unlock(&CPU->fpu_owner->lock);
134 CPU->fpu_owner = NULL;
135 }
136
137 spinlock_lock(&THREAD->lock);
138 if (THREAD->fpu_context_exists) {
139 fpu_context_restore(THREAD->saved_fpu_context);
140 } else {
141 /* Allocate FPU context */
142 if (!THREAD->saved_fpu_context) {
143 /* Might sleep */
144 spinlock_unlock(&THREAD->lock);
145 spinlock_unlock(&CPU->lock);
146 THREAD->saved_fpu_context =
147 (fpu_context_t *) slab_alloc(fpu_context_slab, 0);
148 /* We may have switched CPUs during slab_alloc */
149 goto restart;
150 }
151 fpu_init();
152 THREAD->fpu_context_exists = 1;
153 }
154 CPU->fpu_owner = THREAD;
155 THREAD->fpu_context_engaged = 1;
156 spinlock_unlock(&THREAD->lock);
157
158 spinlock_unlock(&CPU->lock);
159}
160#endif
161
162/** Initialize scheduler
163 *
164 * Initialize kernel scheduler.
165 *
166 */
167void scheduler_init(void)
168{
169}
170
171/** Get thread to be scheduled
172 *
173 * Get the optimal thread to be scheduled
174 * according to thread accounting and scheduler
175 * policy.
176 *
177 * @return Thread to be scheduled.
178 *
179 */
180static thread_t *find_best_thread(void)
181{
182 thread_t *t;
183 runq_t *r;
184 int i;
185
186 ASSERT(CPU != NULL);
187
188loop:
189
190 if (atomic_get(&CPU->nrdy) == 0) {
191 /*
192 * For there was nothing to run, the CPU goes to sleep
193 * until a hardware interrupt or an IPI comes.
194 * This improves energy saving and hyperthreading.
195 */
196
197 /* Mark CPU as it was idle this clock tick */
198 spinlock_lock(&CPU->lock);
199 CPU->idle = true;
200 spinlock_unlock(&CPU->lock);
201
202 interrupts_enable();
203 /*
204 * An interrupt might occur right now and wake up a thread.
205 * In such case, the CPU will continue to go to sleep
206 * even though there is a runnable thread.
207 */
208 cpu_sleep();
209 interrupts_disable();
210 goto loop;
211 }
212
213 for (i = 0; i < RQ_COUNT; i++) {
214 r = &CPU->rq[i];
215 spinlock_lock(&r->lock);
216 if (r->n == 0) {
217 /*
218 * If this queue is empty, try a lower-priority queue.
219 */
220 spinlock_unlock(&r->lock);
221 continue;
222 }
223
224 atomic_dec(&CPU->nrdy);
225 atomic_dec(&nrdy);
226 r->n--;
227
228 /*
229 * Take the first thread from the queue.
230 */
231 t = list_get_instance(r->rq_head.next, thread_t, rq_link);
232 list_remove(&t->rq_link);
233
234 spinlock_unlock(&r->lock);
235
236 spinlock_lock(&t->lock);
237 t->cpu = CPU;
238
239 t->ticks = us2ticks((i + 1) * 10000);
240 t->priority = i; /* correct rq index */
241
242 /*
243 * Clear the THREAD_FLAG_STOLEN flag so that t can be migrated
244 * when load balancing needs emerge.
245 */
246 t->flags &= ~THREAD_FLAG_STOLEN;
247 spinlock_unlock(&t->lock);
248
249 return t;
250 }
251 goto loop;
252
253}
254
255/** Prevent rq starvation
256 *
257 * Prevent low priority threads from starving in rq's.
258 *
259 * When the function decides to relink rq's, it reconnects
260 * respective pointers so that in result threads with 'pri'
261 * greater or equal start are moved to a higher-priority queue.
262 *
263 * @param start Threshold priority.
264 *
265 */
266static void relink_rq(int start)
267{
268 link_t head;
269 runq_t *r;
270 int i, n;
271
272 list_initialize(&head);
273 spinlock_lock(&CPU->lock);
274 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
275 for (i = start; i < RQ_COUNT - 1; i++) {
276 /* remember and empty rq[i + 1] */
277 r = &CPU->rq[i + 1];
278 spinlock_lock(&r->lock);
279 list_concat(&head, &r->rq_head);
280 n = r->n;
281 r->n = 0;
282 spinlock_unlock(&r->lock);
283
284 /* append rq[i + 1] to rq[i] */
285 r = &CPU->rq[i];
286 spinlock_lock(&r->lock);
287 list_concat(&r->rq_head, &head);
288 r->n += n;
289 spinlock_unlock(&r->lock);
290 }
291 CPU->needs_relink = 0;
292 }
293 spinlock_unlock(&CPU->lock);
294
295}
296
297/** The scheduler
298 *
299 * The thread scheduling procedure.
300 * Passes control directly to
301 * scheduler_separated_stack().
302 *
303 */
304void scheduler(void)
305{
306 volatile ipl_t ipl;
307
308 ASSERT(CPU != NULL);
309
310 ipl = interrupts_disable();
311
312 if (atomic_get(&haltstate))
313 halt();
314
315 if (THREAD) {
316 spinlock_lock(&THREAD->lock);
317
318 /* Update thread kernel accounting */
319 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
320
321#ifndef CONFIG_FPU_LAZY
322 fpu_context_save(THREAD->saved_fpu_context);
323#endif
324 if (!context_save(&THREAD->saved_context)) {
325 /*
326 * This is the place where threads leave scheduler();
327 */
328
329 /* Save current CPU cycle */
330 THREAD->last_cycle = get_cycle();
331
332 spinlock_unlock(&THREAD->lock);
333 interrupts_restore(THREAD->saved_context.ipl);
334
335 return;
336 }
337
338 /*
339 * Interrupt priority level of preempted thread is recorded
340 * here to facilitate scheduler() invocations from
341 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
342 */
343 THREAD->saved_context.ipl = ipl;
344 }
345
346 /*
347 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
348 * and preemption counter. At this point THE could be coming either
349 * from THREAD's or CPU's stack.
350 */
351 the_copy(THE, (the_t *) CPU->stack);
352
353 /*
354 * We may not keep the old stack.
355 * Reason: If we kept the old stack and got blocked, for instance, in
356 * find_best_thread(), the old thread could get rescheduled by another
357 * CPU and overwrite the part of its own stack that was also used by
358 * the scheduler on this CPU.
359 *
360 * Moreover, we have to bypass the compiler-generated POP sequence
361 * which is fooled by SP being set to the very top of the stack.
362 * Therefore the scheduler() function continues in
363 * scheduler_separated_stack().
364 */
365 context_save(&CPU->saved_context);
366 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
367 (uintptr_t) CPU->stack, CPU_STACK_SIZE);
368 context_restore(&CPU->saved_context);
369 /* not reached */
370}
371
372/** Scheduler stack switch wrapper
373 *
374 * Second part of the scheduler() function
375 * using new stack. Handling the actual context
376 * switch to a new thread.
377 *
378 * Assume THREAD->lock is held.
379 */
380void scheduler_separated_stack(void)
381{
382 int priority;
383 DEADLOCK_PROBE_INIT(p_joinwq);
384 task_t *old_task = TASK;
385 as_t *old_as = AS;
386
387 ASSERT(CPU != NULL);
388
389 /*
390 * Hold the current task and the address space to prevent their
391 * possible destruction should thread_destroy() be called on this or any
392 * other processor while the scheduler is still using them.
393 */
394 if (old_task)
395 task_hold(old_task);
396 if (old_as)
397 as_hold(old_as);
398
399 if (THREAD) {
400 /* must be run after the switch to scheduler stack */
401 after_thread_ran();
402
403 switch (THREAD->state) {
404 case Running:
405 spinlock_unlock(&THREAD->lock);
406 thread_ready(THREAD);
407 break;
408
409 case Exiting:
410repeat:
411 if (THREAD->detached) {
412 thread_destroy(THREAD);
413 } else {
414 /*
415 * The thread structure is kept allocated until
416 * somebody calls thread_detach() on it.
417 */
418 if (!spinlock_trylock(&THREAD->join_wq.lock)) {
419 /*
420 * Avoid deadlock.
421 */
422 spinlock_unlock(&THREAD->lock);
423 delay(HZ);
424 spinlock_lock(&THREAD->lock);
425 DEADLOCK_PROBE(p_joinwq,
426 DEADLOCK_THRESHOLD);
427 goto repeat;
428 }
429 _waitq_wakeup_unsafe(&THREAD->join_wq,
430 WAKEUP_FIRST);
431 spinlock_unlock(&THREAD->join_wq.lock);
432
433 THREAD->state = Lingering;
434 spinlock_unlock(&THREAD->lock);
435 }
436 break;
437
438 case Sleeping:
439 /*
440 * Prefer the thread after it's woken up.
441 */
442 THREAD->priority = -1;
443
444 /*
445 * We need to release wq->lock which we locked in
446 * waitq_sleep(). Address of wq->lock is kept in
447 * THREAD->sleep_queue.
448 */
449 spinlock_unlock(&THREAD->sleep_queue->lock);
450
451 /*
452 * Check for possible requests for out-of-context
453 * invocation.
454 */
455 if (THREAD->call_me) {
456 THREAD->call_me(THREAD->call_me_with);
457 THREAD->call_me = NULL;
458 THREAD->call_me_with = NULL;
459 }
460
461 spinlock_unlock(&THREAD->lock);
462
463 break;
464
465 default:
466 /*
467 * Entering state is unexpected.
468 */
469 panic("tid%" PRIu64 ": unexpected state %s.",
470 THREAD->tid, thread_states[THREAD->state]);
471 break;
472 }
473
474 THREAD = NULL;
475 }
476
477 THREAD = find_best_thread();
478
479 spinlock_lock(&THREAD->lock);
480 priority = THREAD->priority;
481 spinlock_unlock(&THREAD->lock);
482
483 relink_rq(priority);
484
485 /*
486 * If both the old and the new task are the same, lots of work is
487 * avoided.
488 */
489 if (TASK != THREAD->task) {
490 as_t *new_as = THREAD->task->as;
491
492 /*
493 * Note that it is possible for two tasks to share one address
494 * space.
495 */
496 if (old_as != new_as) {
497 /*
498 * Both tasks and address spaces are different.
499 * Replace the old one with the new one.
500 */
501 as_switch(old_as, new_as);
502 }
503
504 TASK = THREAD->task;
505 before_task_runs();
506 }
507
508 if (old_task)
509 task_release(old_task);
510 if (old_as)
511 as_release(old_as);
512
513 spinlock_lock(&THREAD->lock);
514 THREAD->state = Running;
515
516#ifdef SCHEDULER_VERBOSE
517 printf("cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
518 ", nrdy=%ld)\n", CPU->id, THREAD->tid, THREAD->priority,
519 THREAD->ticks, atomic_get(&CPU->nrdy));
520#endif
521
522 /*
523 * Some architectures provide late kernel PA2KA(identity)
524 * mapping in a page fault handler. However, the page fault
525 * handler uses the kernel stack of the running thread and
526 * therefore cannot be used to map it. The kernel stack, if
527 * necessary, is to be mapped in before_thread_runs(). This
528 * function must be executed before the switch to the new stack.
529 */
530 before_thread_runs();
531
532 /*
533 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
534 * thread's stack.
535 */
536 the_copy(THE, (the_t *) THREAD->kstack);
537
538 context_restore(&THREAD->saved_context);
539 /* not reached */
540}
541
542#ifdef CONFIG_SMP
543/** Load balancing thread
544 *
545 * SMP load balancing thread, supervising thread supplies
546 * for the CPU it's wired to.
547 *
548 * @param arg Generic thread argument (unused).
549 *
550 */
551void kcpulb(void *arg)
552{
553 thread_t *t;
554 int count;
555 atomic_count_t average;
556 unsigned int i;
557 int j;
558 int k = 0;
559 ipl_t ipl;
560
561 /*
562 * Detach kcpulb as nobody will call thread_join_timeout() on it.
563 */
564 thread_detach(THREAD);
565
566loop:
567 /*
568 * Work in 1s intervals.
569 */
570 thread_sleep(1);
571
572not_satisfied:
573 /*
574 * Calculate the number of threads that will be migrated/stolen from
575 * other CPU's. Note that situation can have changed between two
576 * passes. Each time get the most up to date counts.
577 */
578 average = atomic_get(&nrdy) / config.cpu_active + 1;
579 count = average - atomic_get(&CPU->nrdy);
580
581 if (count <= 0)
582 goto satisfied;
583
584 /*
585 * Searching least priority queues on all CPU's first and most priority
586 * queues on all CPU's last.
587 */
588 for (j = RQ_COUNT - 1; j >= 0; j--) {
589 for (i = 0; i < config.cpu_active; i++) {
590 link_t *l;
591 runq_t *r;
592 cpu_t *cpu;
593
594 cpu = &cpus[(i + k) % config.cpu_active];
595
596 /*
597 * Not interested in ourselves.
598 * Doesn't require interrupt disabling for kcpulb has
599 * THREAD_FLAG_WIRED.
600 */
601 if (CPU == cpu)
602 continue;
603 if (atomic_get(&cpu->nrdy) <= average)
604 continue;
605
606 ipl = interrupts_disable();
607 r = &cpu->rq[j];
608 spinlock_lock(&r->lock);
609 if (r->n == 0) {
610 spinlock_unlock(&r->lock);
611 interrupts_restore(ipl);
612 continue;
613 }
614
615 t = NULL;
616 l = r->rq_head.prev; /* search rq from the back */
617 while (l != &r->rq_head) {
618 t = list_get_instance(l, thread_t, rq_link);
619 /*
620 * We don't want to steal CPU-wired threads
621 * neither threads already stolen. The latter
622 * prevents threads from migrating between CPU's
623 * without ever being run. We don't want to
624 * steal threads whose FPU context is still in
625 * CPU.
626 */
627 spinlock_lock(&t->lock);
628 if ((!(t->flags & (THREAD_FLAG_WIRED |
629 THREAD_FLAG_STOLEN))) &&
630 (!(t->fpu_context_engaged))) {
631 /*
632 * Remove t from r.
633 */
634 spinlock_unlock(&t->lock);
635
636 atomic_dec(&cpu->nrdy);
637 atomic_dec(&nrdy);
638
639 r->n--;
640 list_remove(&t->rq_link);
641
642 break;
643 }
644 spinlock_unlock(&t->lock);
645 l = l->prev;
646 t = NULL;
647 }
648 spinlock_unlock(&r->lock);
649
650 if (t) {
651 /*
652 * Ready t on local CPU
653 */
654 spinlock_lock(&t->lock);
655#ifdef KCPULB_VERBOSE
656 printf("kcpulb%u: TID %" PRIu64 " -> cpu%u, "
657 "nrdy=%ld, avg=%ld\n", CPU->id, t->tid,
658 CPU->id, atomic_get(&CPU->nrdy),
659 atomic_get(&nrdy) / config.cpu_active);
660#endif
661 t->flags |= THREAD_FLAG_STOLEN;
662 t->state = Entering;
663 spinlock_unlock(&t->lock);
664
665 thread_ready(t);
666
667 interrupts_restore(ipl);
668
669 if (--count == 0)
670 goto satisfied;
671
672 /*
673 * We are not satisfied yet, focus on another
674 * CPU next time.
675 */
676 k++;
677
678 continue;
679 }
680 interrupts_restore(ipl);
681 }
682 }
683
684 if (atomic_get(&CPU->nrdy)) {
685 /*
686 * Be a little bit light-weight and let migrated threads run.
687 */
688 scheduler();
689 } else {
690 /*
691 * We failed to migrate a single thread.
692 * Give up this turn.
693 */
694 goto loop;
695 }
696
697 goto not_satisfied;
698
699satisfied:
700 goto loop;
701}
702
703#endif /* CONFIG_SMP */
704
705
706/** Print information about threads & scheduler queues */
707void sched_print_list(void)
708{
709 ipl_t ipl;
710 unsigned int cpu, i;
711 runq_t *r;
712 thread_t *t;
713 link_t *cur;
714
715 /* We are going to mess with scheduler structures,
716 * let's not be interrupted */
717 ipl = interrupts_disable();
718 for (cpu = 0; cpu < config.cpu_count; cpu++) {
719
720 if (!cpus[cpu].active)
721 continue;
722
723 spinlock_lock(&cpus[cpu].lock);
724 printf("cpu%u: address=%p, nrdy=%ld, needs_relink=%" PRIs "\n",
725 cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
726 cpus[cpu].needs_relink);
727
728 for (i = 0; i < RQ_COUNT; i++) {
729 r = &cpus[cpu].rq[i];
730 spinlock_lock(&r->lock);
731 if (!r->n) {
732 spinlock_unlock(&r->lock);
733 continue;
734 }
735 printf("\trq[%u]: ", i);
736 for (cur = r->rq_head.next; cur != &r->rq_head;
737 cur = cur->next) {
738 t = list_get_instance(cur, thread_t, rq_link);
739 printf("%" PRIu64 "(%s) ", t->tid,
740 thread_states[t->state]);
741 }
742 printf("\n");
743 spinlock_unlock(&r->lock);
744 }
745 spinlock_unlock(&cpus[cpu].lock);
746 }
747
748 interrupts_restore(ipl);
749}
750
751/** @}
752 */
Note: See TracBrowser for help on using the repository browser.