source: mainline/generic/src/proc/scheduler.c@ 248fc1a

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 248fc1a was 248fc1a, checked in by Ondrej Palkovsky <ondrap@…>, 19 years ago

Fixed some typos in slab allocator.
Scheduler now has better algorithm on load balancing.
Unfortunately it reveals deadlock in slab allocator :-/

  • Property mode set to 100644
File size: 14.2 KB
Line 
1/*
2 * Copyright (C) 2001-2004 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <proc/scheduler.h>
30#include <proc/thread.h>
31#include <proc/task.h>
32#include <mm/heap.h>
33#include <mm/frame.h>
34#include <mm/page.h>
35#include <mm/as.h>
36#include <arch/asm.h>
37#include <arch/faddr.h>
38#include <arch/atomic.h>
39#include <synch/spinlock.h>
40#include <config.h>
41#include <context.h>
42#include <func.h>
43#include <arch.h>
44#include <list.h>
45#include <panic.h>
46#include <typedefs.h>
47#include <cpu.h>
48#include <print.h>
49#include <debug.h>
50
51atomic_t nrdy;
52
53/** Take actions before new thread runs
54 *
55 * Perform actions that need to be
56 * taken before the newly selected
57 * tread is passed control.
58 *
59 */
60void before_thread_runs(void)
61{
62 before_thread_runs_arch();
63#ifdef CONFIG_FPU_LAZY
64 if(THREAD==CPU->fpu_owner)
65 fpu_enable();
66 else
67 fpu_disable();
68#else
69 fpu_enable();
70 if (THREAD->fpu_context_exists)
71 fpu_context_restore(&(THREAD->saved_fpu_context));
72 else {
73 fpu_init();
74 THREAD->fpu_context_exists=1;
75 }
76#endif
77}
78
79#ifdef CONFIG_FPU_LAZY
80void scheduler_fpu_lazy_request(void)
81{
82 fpu_enable();
83 if (CPU->fpu_owner != NULL) {
84 fpu_context_save(&CPU->fpu_owner->saved_fpu_context);
85 /* don't prevent migration */
86 CPU->fpu_owner->fpu_context_engaged=0;
87 }
88 if (THREAD->fpu_context_exists)
89 fpu_context_restore(&THREAD->saved_fpu_context);
90 else {
91 fpu_init();
92 THREAD->fpu_context_exists=1;
93 }
94 CPU->fpu_owner=THREAD;
95 THREAD->fpu_context_engaged = 1;
96}
97#endif
98
99/** Initialize scheduler
100 *
101 * Initialize kernel scheduler.
102 *
103 */
104void scheduler_init(void)
105{
106}
107
108
109/** Get thread to be scheduled
110 *
111 * Get the optimal thread to be scheduled
112 * according to thread accounting and scheduler
113 * policy.
114 *
115 * @return Thread to be scheduled.
116 *
117 */
118static thread_t *find_best_thread(void)
119{
120 thread_t *t;
121 runq_t *r;
122 int i;
123
124 ASSERT(CPU != NULL);
125
126loop:
127 interrupts_enable();
128
129 if (atomic_get(&CPU->nrdy) == 0) {
130 /*
131 * For there was nothing to run, the CPU goes to sleep
132 * until a hardware interrupt or an IPI comes.
133 * This improves energy saving and hyperthreading.
134 */
135 cpu_sleep();
136 goto loop;
137 }
138
139 interrupts_disable();
140
141 i = 0;
142 for (; i<RQ_COUNT; i++) {
143 r = &CPU->rq[i];
144 spinlock_lock(&r->lock);
145 if (r->n == 0) {
146 /*
147 * If this queue is empty, try a lower-priority queue.
148 */
149 spinlock_unlock(&r->lock);
150 continue;
151 }
152
153 atomic_dec(&CPU->nrdy);
154 atomic_dec(&nrdy);
155 r->n--;
156
157 /*
158 * Take the first thread from the queue.
159 */
160 t = list_get_instance(r->rq_head.next, thread_t, rq_link);
161 list_remove(&t->rq_link);
162
163 spinlock_unlock(&r->lock);
164
165 spinlock_lock(&t->lock);
166 t->cpu = CPU;
167
168 t->ticks = us2ticks((i+1)*10000);
169 t->priority = i; /* eventually correct rq index */
170
171 /*
172 * Clear the X_STOLEN flag so that t can be migrated when load balancing needs emerge.
173 */
174 t->flags &= ~X_STOLEN;
175 spinlock_unlock(&t->lock);
176
177 return t;
178 }
179 goto loop;
180
181}
182
183
184/** Prevent rq starvation
185 *
186 * Prevent low priority threads from starving in rq's.
187 *
188 * When the function decides to relink rq's, it reconnects
189 * respective pointers so that in result threads with 'pri'
190 * greater or equal 'start' are moved to a higher-priority queue.
191 *
192 * @param start Threshold priority.
193 *
194 */
195static void relink_rq(int start)
196{
197 link_t head;
198 runq_t *r;
199 int i, n;
200
201 list_initialize(&head);
202 spinlock_lock(&CPU->lock);
203 if (CPU->needs_relink > NEEDS_RELINK_MAX) {
204 for (i = start; i<RQ_COUNT-1; i++) {
205 /* remember and empty rq[i + 1] */
206 r = &CPU->rq[i + 1];
207 spinlock_lock(&r->lock);
208 list_concat(&head, &r->rq_head);
209 n = r->n;
210 r->n = 0;
211 spinlock_unlock(&r->lock);
212
213 /* append rq[i + 1] to rq[i] */
214 r = &CPU->rq[i];
215 spinlock_lock(&r->lock);
216 list_concat(&r->rq_head, &head);
217 r->n += n;
218 spinlock_unlock(&r->lock);
219 }
220 CPU->needs_relink = 0;
221 }
222 spinlock_unlock(&CPU->lock);
223
224}
225
226
227/** Scheduler stack switch wrapper
228 *
229 * Second part of the scheduler() function
230 * using new stack. Handling the actual context
231 * switch to a new thread.
232 *
233 */
234static void scheduler_separated_stack(void)
235{
236 int priority;
237
238 ASSERT(CPU != NULL);
239
240 if (THREAD) {
241 switch (THREAD->state) {
242 case Running:
243 THREAD->state = Ready;
244 spinlock_unlock(&THREAD->lock);
245 thread_ready(THREAD);
246 break;
247
248 case Exiting:
249 frame_free((__address) THREAD->kstack);
250 if (THREAD->ustack) {
251 frame_free((__address) THREAD->ustack);
252 }
253
254 /*
255 * Detach from the containing task.
256 */
257 spinlock_lock(&TASK->lock);
258 list_remove(&THREAD->th_link);
259 spinlock_unlock(&TASK->lock);
260
261 spinlock_unlock(&THREAD->lock);
262
263 spinlock_lock(&threads_lock);
264 list_remove(&THREAD->threads_link);
265 spinlock_unlock(&threads_lock);
266
267 spinlock_lock(&CPU->lock);
268 if(CPU->fpu_owner==THREAD)
269 CPU->fpu_owner=NULL;
270 spinlock_unlock(&CPU->lock);
271
272 free(THREAD);
273
274 break;
275
276 case Sleeping:
277 /*
278 * Prefer the thread after it's woken up.
279 */
280 THREAD->priority = -1;
281
282 /*
283 * We need to release wq->lock which we locked in waitq_sleep().
284 * Address of wq->lock is kept in THREAD->sleep_queue.
285 */
286 spinlock_unlock(&THREAD->sleep_queue->lock);
287
288 /*
289 * Check for possible requests for out-of-context invocation.
290 */
291 if (THREAD->call_me) {
292 THREAD->call_me(THREAD->call_me_with);
293 THREAD->call_me = NULL;
294 THREAD->call_me_with = NULL;
295 }
296
297 spinlock_unlock(&THREAD->lock);
298
299 break;
300
301 default:
302 /*
303 * Entering state is unexpected.
304 */
305 panic("tid%d: unexpected state %s\n", THREAD->tid, thread_states[THREAD->state]);
306 break;
307 }
308 THREAD = NULL;
309 }
310
311
312 THREAD = find_best_thread();
313
314 spinlock_lock(&THREAD->lock);
315 priority = THREAD->priority;
316 spinlock_unlock(&THREAD->lock);
317
318 relink_rq(priority);
319
320 spinlock_lock(&THREAD->lock);
321
322 /*
323 * If both the old and the new task are the same, lots of work is avoided.
324 */
325 if (TASK != THREAD->task) {
326 as_t *as1 = NULL;
327 as_t *as2;
328
329 if (TASK) {
330 spinlock_lock(&TASK->lock);
331 as1 = TASK->as;
332 spinlock_unlock(&TASK->lock);
333 }
334
335 spinlock_lock(&THREAD->task->lock);
336 as2 = THREAD->task->as;
337 spinlock_unlock(&THREAD->task->lock);
338
339 /*
340 * Note that it is possible for two tasks to share one address space.
341 */
342 if (as1 != as2) {
343 /*
344 * Both tasks and address spaces are different.
345 * Replace the old one with the new one.
346 */
347 as_install(as2);
348 }
349 TASK = THREAD->task;
350 }
351
352 THREAD->state = Running;
353
354 #ifdef SCHEDULER_VERBOSE
355 printf("cpu%d: tid %d (priority=%d,ticks=%d,nrdy=%d)\n", CPU->id, THREAD->tid, THREAD->priority, THREAD->ticks, CPU->nrdy);
356 #endif
357
358 /*
359 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to thread's stack.
360 */
361 the_copy(THE, (the_t *) THREAD->kstack);
362
363 context_restore(&THREAD->saved_context);
364 /* not reached */
365}
366
367
368/** The scheduler
369 *
370 * The thread scheduling procedure.
371 * Passes control directly to
372 * scheduler_separated_stack().
373 *
374 */
375void scheduler(void)
376{
377 volatile ipl_t ipl;
378
379 ASSERT(CPU != NULL);
380
381 ipl = interrupts_disable();
382
383 if (atomic_get(&haltstate))
384 halt();
385
386 if (THREAD) {
387 spinlock_lock(&THREAD->lock);
388#ifndef CONFIG_FPU_LAZY
389 fpu_context_save(&(THREAD->saved_fpu_context));
390#endif
391 if (!context_save(&THREAD->saved_context)) {
392 /*
393 * This is the place where threads leave scheduler();
394 */
395 before_thread_runs();
396 spinlock_unlock(&THREAD->lock);
397 interrupts_restore(THREAD->saved_context.ipl);
398 return;
399 }
400
401 /*
402 * Interrupt priority level of preempted thread is recorded here
403 * to facilitate scheduler() invocations from interrupts_disable()'d
404 * code (e.g. waitq_sleep_timeout()).
405 */
406 THREAD->saved_context.ipl = ipl;
407 }
408
409 /*
410 * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
411 * and preemption counter. At this point THE could be coming either
412 * from THREAD's or CPU's stack.
413 */
414 the_copy(THE, (the_t *) CPU->stack);
415
416 /*
417 * We may not keep the old stack.
418 * Reason: If we kept the old stack and got blocked, for instance, in
419 * find_best_thread(), the old thread could get rescheduled by another
420 * CPU and overwrite the part of its own stack that was also used by
421 * the scheduler on this CPU.
422 *
423 * Moreover, we have to bypass the compiler-generated POP sequence
424 * which is fooled by SP being set to the very top of the stack.
425 * Therefore the scheduler() function continues in
426 * scheduler_separated_stack().
427 */
428 context_save(&CPU->saved_context);
429 context_set(&CPU->saved_context, FADDR(scheduler_separated_stack), (__address) CPU->stack, CPU_STACK_SIZE);
430 context_restore(&CPU->saved_context);
431 /* not reached */
432}
433
434
435
436
437
438#ifdef CONFIG_SMP
439/** Load balancing thread
440 *
441 * SMP load balancing thread, supervising thread supplies
442 * for the CPU it's wired to.
443 *
444 * @param arg Generic thread argument (unused).
445 *
446 */
447void kcpulb(void *arg)
448{
449 thread_t *t;
450 int count, average, i, j, k = 0;
451 ipl_t ipl;
452
453loop:
454 /*
455 * Work in 1s intervals.
456 */
457 thread_sleep(1);
458
459not_satisfied:
460 /*
461 * Calculate the number of threads that will be migrated/stolen from
462 * other CPU's. Note that situation can have changed between two
463 * passes. Each time get the most up to date counts.
464 */
465 average = atomic_get(&nrdy) / config.cpu_active;
466 count = average - atomic_get(&CPU->nrdy);
467
468 if (count < 0)
469 goto satisfied;
470
471 if (!count) { /* Try to steal threads from CPU's that have more then average count */
472 count = 1;
473 average += 1;
474 }
475
476 /*
477 * Searching least priority queues on all CPU's first and most priority queues on all CPU's last.
478 */
479 for (j=RQ_COUNT-1; j >= 0; j--) {
480 for (i=0; i < config.cpu_active; i++) {
481 link_t *l;
482 runq_t *r;
483 cpu_t *cpu;
484
485 cpu = &cpus[(i + k) % config.cpu_active];
486
487 /*
488 * Not interested in ourselves.
489 * Doesn't require interrupt disabling for kcpulb is X_WIRED.
490 */
491 if (CPU == cpu)
492 continue;
493 if (atomic_get(&cpu->nrdy) <= average)
494 continue;
495
496restart: ipl = interrupts_disable();
497 r = &cpu->rq[j];
498 spinlock_lock(&r->lock);
499 if (r->n == 0) {
500 spinlock_unlock(&r->lock);
501 interrupts_restore(ipl);
502 continue;
503 }
504
505 t = NULL;
506 l = r->rq_head.prev; /* search rq from the back */
507 while (l != &r->rq_head) {
508 t = list_get_instance(l, thread_t, rq_link);
509 /*
510 * We don't want to steal CPU-wired threads neither threads already stolen.
511 * The latter prevents threads from migrating between CPU's without ever being run.
512 * We don't want to steal threads whose FPU context is still in CPU.
513 */
514 spinlock_lock(&t->lock);
515 if ( (!(t->flags & (X_WIRED | X_STOLEN))) && (!(t->fpu_context_engaged)) ) {
516
517 /*
518 * Remove t from r.
519 */
520
521 spinlock_unlock(&t->lock);
522
523 /*
524 * Here we have to avoid deadlock with relink_rq(),
525 * because it locks cpu and r in a different order than we do.
526 */
527 if (!spinlock_trylock(&cpu->lock)) {
528 /* Release all locks and try again. */
529 spinlock_unlock(&r->lock);
530 interrupts_restore(ipl);
531 goto restart;
532 }
533 atomic_dec(&cpu->nrdy);
534 spinlock_unlock(&cpu->lock);
535
536 atomic_dec(&nrdy);
537
538 r->n--;
539 list_remove(&t->rq_link);
540
541 break;
542 }
543 spinlock_unlock(&t->lock);
544 l = l->prev;
545 t = NULL;
546 }
547 spinlock_unlock(&r->lock);
548
549 if (t) {
550 /*
551 * Ready t on local CPU
552 */
553 spinlock_lock(&t->lock);
554 #ifdef KCPULB_VERBOSE
555 printf("kcpulb%d: TID %d -> cpu%d, nrdy=%d, avg=%d\n", CPU->id, t->tid, CPU->id, atomic_get(&CPU->nrdy), atomic_get(&nrdy) / config.cpu_active);
556 #endif
557 t->flags |= X_STOLEN;
558 spinlock_unlock(&t->lock);
559
560 thread_ready(t);
561
562 interrupts_restore(ipl);
563
564 if (--count == 0)
565 goto satisfied;
566
567 /*
568 * We are not satisfied yet, focus on another CPU next time.
569 */
570 k++;
571
572 continue;
573 }
574 interrupts_restore(ipl);
575 }
576 }
577
578 if (atomic_get(&CPU->nrdy)) {
579 /*
580 * Be a little bit light-weight and let migrated threads run.
581 */
582 scheduler();
583 } else {
584 /*
585 * We failed to migrate a single thread.
586 * Give up this turn.
587 */
588 goto loop;
589 }
590
591 goto not_satisfied;
592
593satisfied:
594 goto loop;
595}
596
597#endif /* CONFIG_SMP */
598
599
600/** Print information about threads & scheduler queues */
601void sched_print_list(void)
602{
603 ipl_t ipl;
604 int cpu,i;
605 runq_t *r;
606 thread_t *t;
607 link_t *cur;
608
609 /* We are going to mess with scheduler structures,
610 * let's not be interrupted */
611 ipl = interrupts_disable();
612 printf("*********** Scheduler dump ***********\n");
613 for (cpu=0;cpu < config.cpu_count; cpu++) {
614 if (!cpus[cpu].active)
615 continue;
616 spinlock_lock(&cpus[cpu].lock);
617 printf("cpu%d: nrdy: %d needs_relink: %d\n",
618 cpus[cpu].id, atomic_get(&cpus[cpu].nrdy), cpus[cpu].needs_relink);
619
620 for (i=0; i<RQ_COUNT; i++) {
621 r = &cpus[cpu].rq[i];
622 spinlock_lock(&r->lock);
623 if (!r->n) {
624 spinlock_unlock(&r->lock);
625 continue;
626 }
627 printf("\tRq %d: ", i);
628 for (cur=r->rq_head.next; cur!=&r->rq_head; cur=cur->next) {
629 t = list_get_instance(cur, thread_t, rq_link);
630 printf("%d(%s) ", t->tid,
631 thread_states[t->state]);
632 }
633 printf("\n");
634 spinlock_unlock(&r->lock);
635 }
636 spinlock_unlock(&cpus[cpu].lock);
637 }
638
639 interrupts_restore(ipl);
640}
Note: See TracBrowser for help on using the repository browser.