source: mainline/kernel/generic/src/proc/scheduler.c@ 6e49dab

Last change on this file since 6e49dab was 6e49dab, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 18 months ago

Extract post-thread cleanup into a separate function

  • Property mode set to 100644
File size: 16.9 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup kernel_generic_proc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <assert.h>
42#include <atomic.h>
43#include <proc/scheduler.h>
44#include <proc/thread.h>
45#include <proc/task.h>
46#include <mm/frame.h>
47#include <mm/page.h>
48#include <mm/as.h>
49#include <time/timeout.h>
50#include <time/delay.h>
51#include <arch/asm.h>
52#include <arch/faddr.h>
53#include <arch/cycle.h>
54#include <atomic.h>
55#include <synch/spinlock.h>
56#include <config.h>
57#include <context.h>
58#include <fpu_context.h>
59#include <halt.h>
60#include <arch.h>
61#include <adt/list.h>
62#include <panic.h>
63#include <cpu.h>
64#include <stdio.h>
65#include <log.h>
66#include <stacktrace.h>
67
68static void scheduler_separated_stack(void);
69static void fpu_restore(void);
70
71atomic_size_t nrdy; /**< Number of ready threads in the system. */
72
73/** Take actions before new thread runs.
74 *
75 * Perform actions that need to be
76 * taken before the newly selected
77 * thread is passed control.
78 *
79 * THREAD->lock is locked on entry
80 *
81 */
82static void before_thread_runs(void)
83{
84 before_thread_runs_arch();
85
86 fpu_restore();
87
88#ifdef CONFIG_UDEBUG
89 if (THREAD->btrace) {
90 istate_t *istate = THREAD->udebug.uspace_state;
91 if (istate != NULL) {
92 printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
93 stack_trace_istate(istate);
94 }
95
96 THREAD->btrace = false;
97 }
98#endif
99}
100
101#ifdef CONFIG_FPU_LAZY
102void scheduler_fpu_lazy_request(void)
103{
104 fpu_enable();
105
106 /* We need this lock to ensure synchronization with thread destructor. */
107 irq_spinlock_lock(&CPU->fpu_lock, false);
108
109 /* Save old context */
110 thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
111 if (owner != NULL) {
112 fpu_context_save(&owner->fpu_context);
113 atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
114 }
115
116 irq_spinlock_unlock(&CPU->fpu_lock, false);
117
118 if (THREAD->fpu_context_exists) {
119 fpu_context_restore(&THREAD->fpu_context);
120 } else {
121 fpu_init();
122 THREAD->fpu_context_exists = true;
123 }
124
125 atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
126}
127#endif /* CONFIG_FPU_LAZY */
128
129/** Initialize scheduler
130 *
131 * Initialize kernel scheduler.
132 *
133 */
134void scheduler_init(void)
135{
136}
137
138/** Get thread to be scheduled
139 *
140 * Get the optimal thread to be scheduled
141 * according to thread accounting and scheduler
142 * policy.
143 *
144 * @return Thread to be scheduled.
145 *
146 */
147static thread_t *try_find_thread(int *rq_index)
148{
149 assert(interrupts_disabled());
150 assert(CPU != NULL);
151
152 if (atomic_load(&CPU->nrdy) == 0)
153 return NULL;
154
155 for (int i = 0; i < RQ_COUNT; i++) {
156 irq_spinlock_lock(&(CPU->rq[i].lock), false);
157 if (CPU->rq[i].n == 0) {
158 /*
159 * If this queue is empty, try a lower-priority queue.
160 */
161 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
162 continue;
163 }
164
165 atomic_dec(&CPU->nrdy);
166 atomic_dec(&nrdy);
167 CPU->rq[i].n--;
168
169 /*
170 * Take the first thread from the queue.
171 */
172 thread_t *thread = list_get_instance(
173 list_first(&CPU->rq[i].rq), thread_t, rq_link);
174 list_remove(&thread->rq_link);
175
176 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
177
178 thread->cpu = CPU;
179 thread->priority = i; /* Correct rq index */
180
181 /* Time allocation in microseconds. */
182 uint64_t time_to_run = (i + 1) * 10000;
183
184 /* This is safe because interrupts are disabled. */
185 CPU_LOCAL->preempt_deadline =
186 CPU_LOCAL->current_clock_tick + us2ticks(time_to_run);
187
188 /*
189 * Clear the stolen flag so that it can be migrated
190 * when load balancing needs emerge.
191 */
192 thread->stolen = false;
193 irq_spinlock_unlock(&thread->lock, false);
194
195 *rq_index = i;
196 return thread;
197 }
198
199 return NULL;
200}
201
202/** Get thread to be scheduled
203 *
204 * Get the optimal thread to be scheduled
205 * according to thread accounting and scheduler
206 * policy.
207 *
208 * @return Thread to be scheduled.
209 *
210 */
211static thread_t *find_best_thread(int *rq_index)
212{
213 assert(interrupts_disabled());
214 assert(CPU != NULL);
215
216 while (true) {
217 thread_t *thread = try_find_thread(rq_index);
218
219 if (thread != NULL)
220 return thread;
221
222 /*
223 * For there was nothing to run, the CPU goes to sleep
224 * until a hardware interrupt or an IPI comes.
225 * This improves energy saving and hyperthreading.
226 */
227 CPU_LOCAL->idle = true;
228
229 /*
230 * Go to sleep with interrupts enabled.
231 * Ideally, this should be atomic, but this is not guaranteed on
232 * all platforms yet, so it is possible we will go sleep when
233 * a thread has just become available.
234 */
235 cpu_interruptible_sleep();
236 }
237}
238
239static void switch_task(task_t *task)
240{
241 /* If the task stays the same, a lot of work is avoided. */
242 if (TASK == task)
243 return;
244
245 as_t *old_as = AS;
246 as_t *new_as = task->as;
247
248 /* It is possible for two tasks to share one address space. */
249 if (old_as != new_as)
250 as_switch(old_as, new_as);
251
252 if (TASK)
253 task_release(TASK);
254
255 TASK = task;
256
257 task_hold(TASK);
258
259 before_task_runs_arch();
260}
261
262/** Prevent rq starvation
263 *
264 * Prevent low priority threads from starving in rq's.
265 *
266 * When the function decides to relink rq's, it reconnects
267 * respective pointers so that in result threads with 'pri'
268 * greater or equal start are moved to a higher-priority queue.
269 *
270 * @param start Threshold priority.
271 *
272 */
273static void relink_rq(int start)
274{
275 if (CPU_LOCAL->current_clock_tick < CPU_LOCAL->relink_deadline)
276 return;
277
278 CPU_LOCAL->relink_deadline = CPU_LOCAL->current_clock_tick + NEEDS_RELINK_MAX;
279
280 /* Temporary cache for lists we are moving. */
281 list_t list;
282 list_initialize(&list);
283
284 size_t n = 0;
285
286 /* Move every list (except the one with highest priority) one level up. */
287 for (int i = RQ_COUNT - 1; i > start; i--) {
288 irq_spinlock_lock(&CPU->rq[i].lock, false);
289
290 /* Swap lists. */
291 list_swap(&CPU->rq[i].rq, &list);
292
293 /* Swap number of items. */
294 size_t tmpn = CPU->rq[i].n;
295 CPU->rq[i].n = n;
296 n = tmpn;
297
298 irq_spinlock_unlock(&CPU->rq[i].lock, false);
299 }
300
301 /* Append the contents of rq[start + 1] to rq[start]. */
302 if (n != 0) {
303 irq_spinlock_lock(&CPU->rq[start].lock, false);
304 list_concat(&CPU->rq[start].rq, &list);
305 CPU->rq[start].n += n;
306 irq_spinlock_unlock(&CPU->rq[start].lock, false);
307 }
308}
309
310/**
311 * Do whatever needs to be done with current FPU state before we switch to
312 * another thread.
313 */
314static void fpu_cleanup(void)
315{
316#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
317 fpu_context_save(&THREAD->fpu_context);
318#endif
319}
320
321/**
322 * Set correct FPU state for this thread after switch from another thread.
323 */
324static void fpu_restore(void)
325{
326#ifdef CONFIG_FPU_LAZY
327 /*
328 * The only concurrent modification possible for fpu_owner here is
329 * another thread changing it from itself to NULL in its destructor.
330 */
331 thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
332 memory_order_relaxed);
333
334 if (THREAD == owner)
335 fpu_enable();
336 else
337 fpu_disable();
338
339#elif defined CONFIG_FPU
340 fpu_enable();
341 if (THREAD->fpu_context_exists)
342 fpu_context_restore(&THREAD->fpu_context);
343 else {
344 fpu_init();
345 THREAD->fpu_context_exists = true;
346 }
347#endif
348}
349
350void scheduler(void)
351{
352 ipl_t ipl = interrupts_disable();
353
354 if (atomic_load(&haltstate))
355 halt();
356
357 if (THREAD) {
358 irq_spinlock_lock(&THREAD->lock, false);
359 }
360
361 scheduler_locked(ipl);
362}
363
364static void cleanup_after_thread(thread_t *thread, state_t out_state)
365{
366 assert(CURRENT->mutex_locks == 0);
367 assert(interrupts_disabled());
368
369 int expected;
370
371 switch (out_state) {
372 case Running:
373 thread_ready(thread);
374 break;
375
376 case Exiting:
377 waitq_close(&thread->join_wq);
378
379 /*
380 * Release the reference CPU has for the thread.
381 * If there are no other references (e.g. threads calling join),
382 * the thread structure is deallocated.
383 */
384 thread_put(thread);
385 break;
386
387 case Sleeping:
388 expected = SLEEP_INITIAL;
389
390 /* Only set SLEEP_ASLEEP in sleep pad if it's still in initial state */
391 if (!atomic_compare_exchange_strong_explicit(&thread->sleep_state,
392 &expected, SLEEP_ASLEEP,
393 memory_order_acq_rel, memory_order_acquire)) {
394
395 assert(expected == SLEEP_WOKE);
396 /* The thread has already been woken up, requeue immediately. */
397 thread_ready(thread);
398 }
399 break;
400
401 default:
402 /*
403 * Entering state is unexpected.
404 */
405 panic("tid%" PRIu64 ": unexpected state %s.",
406 thread->tid, thread_states[thread->state]);
407 break;
408 }
409}
410
411/** The scheduler
412 *
413 * The thread scheduling procedure.
414 * Passes control directly to
415 * scheduler_separated_stack().
416 *
417 */
418void scheduler_locked(ipl_t ipl)
419{
420 assert(CPU != NULL);
421
422 if (THREAD) {
423 /* Update thread kernel accounting */
424 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
425
426 fpu_cleanup();
427
428 if (!context_save(&THREAD->saved_context)) {
429 /*
430 * This is the place where threads leave scheduler();
431 */
432
433 /* Save current CPU cycle */
434 THREAD->last_cycle = get_cycle();
435
436 irq_spinlock_unlock(&THREAD->lock, false);
437 interrupts_restore(THREAD->saved_ipl);
438
439 return;
440 }
441
442 /*
443 * Interrupt priority level of preempted thread is recorded
444 * here to facilitate scheduler() invocations from
445 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
446 *
447 */
448 THREAD->saved_ipl = ipl;
449 }
450
451 /*
452 * Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
453 * and preemption counter. At this point CURRENT could be coming either
454 * from THREAD's or CPU's stack.
455 *
456 */
457 current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
458
459 /*
460 * We may not keep the old stack.
461 * Reason: If we kept the old stack and got blocked, for instance, in
462 * find_best_thread(), the old thread could get rescheduled by another
463 * CPU and overwrite the part of its own stack that was also used by
464 * the scheduler on this CPU.
465 *
466 * Moreover, we have to bypass the compiler-generated POP sequence
467 * which is fooled by SP being set to the very top of the stack.
468 * Therefore the scheduler() function continues in
469 * scheduler_separated_stack().
470 *
471 */
472 context_t ctx;
473 context_save(&ctx);
474 context_set(&ctx, FADDR(scheduler_separated_stack),
475 (uintptr_t) CPU_LOCAL->stack, STACK_SIZE);
476 context_restore(&ctx);
477
478 /* Not reached */
479}
480
481/** Scheduler stack switch wrapper
482 *
483 * Second part of the scheduler() function
484 * using new stack. Handling the actual context
485 * switch to a new thread.
486 *
487 */
488void scheduler_separated_stack(void)
489{
490 assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
491 assert(CPU != NULL);
492 assert(interrupts_disabled());
493
494 if (THREAD) {
495 after_thread_ran_arch();
496
497 state_t state = THREAD->state;
498
499 if (state == Sleeping) {
500 /* Prefer the thread after it's woken up. */
501 THREAD->priority = -1;
502 }
503
504 irq_spinlock_unlock(&THREAD->lock, false);
505
506 cleanup_after_thread(THREAD, state);
507
508 THREAD = NULL;
509 }
510
511 int rq_index;
512 THREAD = find_best_thread(&rq_index);
513
514 relink_rq(rq_index);
515
516 switch_task(THREAD->task);
517
518 irq_spinlock_lock(&THREAD->lock, false);
519 THREAD->state = Running;
520
521#ifdef SCHEDULER_VERBOSE
522 log(LF_OTHER, LVL_DEBUG,
523 "cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
524 ", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
525 THREAD->ticks, atomic_load(&CPU->nrdy));
526#endif
527
528 /*
529 * Some architectures provide late kernel PA2KA(identity)
530 * mapping in a page fault handler. However, the page fault
531 * handler uses the kernel stack of the running thread and
532 * therefore cannot be used to map it. The kernel stack, if
533 * necessary, is to be mapped in before_thread_runs(). This
534 * function must be executed before the switch to the new stack.
535 */
536 before_thread_runs();
537
538 /*
539 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
540 * thread's stack.
541 */
542 current_copy(CURRENT, (current_t *) THREAD->kstack);
543
544 context_restore(&THREAD->saved_context);
545
546 /* Not reached */
547}
548
549#ifdef CONFIG_SMP
550
551static thread_t *steal_thread_from(cpu_t *old_cpu, int i)
552{
553 runq_t *old_rq = &old_cpu->rq[i];
554 runq_t *new_rq = &CPU->rq[i];
555
556 ipl_t ipl = interrupts_disable();
557
558 irq_spinlock_lock(&old_rq->lock, false);
559
560 /*
561 * If fpu_owner is any thread in the list, its store is seen here thanks to
562 * the runqueue lock.
563 */
564 thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
565 memory_order_relaxed);
566
567 /* Search rq from the back */
568 list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
569
570 irq_spinlock_lock(&thread->lock, false);
571
572 /*
573 * Do not steal CPU-wired threads, threads
574 * already stolen, threads for which migration
575 * was temporarily disabled or threads whose
576 * FPU context is still in the CPU.
577 */
578 if (thread->stolen || thread->nomigrate ||
579 thread == fpu_owner) {
580 irq_spinlock_unlock(&thread->lock, false);
581 continue;
582 }
583
584 thread->stolen = true;
585 thread->cpu = CPU;
586
587 irq_spinlock_unlock(&thread->lock, false);
588
589 /*
590 * Ready thread on local CPU
591 */
592
593#ifdef KCPULB_VERBOSE
594 log(LF_OTHER, LVL_DEBUG,
595 "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
596 "nrdy=%ld, avg=%ld", CPU->id, thread->tid,
597 CPU->id, atomic_load(&CPU->nrdy),
598 atomic_load(&nrdy) / config.cpu_active);
599#endif
600
601 /* Remove thread from ready queue. */
602 old_rq->n--;
603 list_remove(&thread->rq_link);
604 irq_spinlock_unlock(&old_rq->lock, false);
605
606 /* Append thread to local queue. */
607 irq_spinlock_lock(&new_rq->lock, false);
608 list_append(&thread->rq_link, &new_rq->rq);
609 new_rq->n++;
610 irq_spinlock_unlock(&new_rq->lock, false);
611
612 atomic_dec(&old_cpu->nrdy);
613 atomic_inc(&CPU->nrdy);
614 interrupts_restore(ipl);
615 return thread;
616 }
617
618 irq_spinlock_unlock(&old_rq->lock, false);
619 interrupts_restore(ipl);
620 return NULL;
621}
622
623/** Load balancing thread
624 *
625 * SMP load balancing thread, supervising thread supplies
626 * for the CPU it's wired to.
627 *
628 * @param arg Generic thread argument (unused).
629 *
630 */
631void kcpulb(void *arg)
632{
633 size_t average;
634 size_t rdy;
635
636loop:
637 /*
638 * Work in 1s intervals.
639 */
640 thread_sleep(1);
641
642not_satisfied:
643 /*
644 * Calculate the number of threads that will be migrated/stolen from
645 * other CPU's. Note that situation can have changed between two
646 * passes. Each time get the most up to date counts.
647 *
648 */
649 average = atomic_load(&nrdy) / config.cpu_active + 1;
650 rdy = atomic_load(&CPU->nrdy);
651
652 if (average <= rdy)
653 goto satisfied;
654
655 size_t count = average - rdy;
656
657 /*
658 * Searching least priority queues on all CPU's first and most priority
659 * queues on all CPU's last.
660 */
661 size_t acpu;
662 int rq;
663
664 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
665 for (acpu = 0; acpu < config.cpu_active; acpu++) {
666 cpu_t *cpu = &cpus[acpu];
667
668 /*
669 * Not interested in ourselves.
670 * Doesn't require interrupt disabling for kcpulb has
671 * THREAD_FLAG_WIRED.
672 *
673 */
674 if (CPU == cpu)
675 continue;
676
677 if (atomic_load(&cpu->nrdy) <= average)
678 continue;
679
680 if (steal_thread_from(cpu, rq) && --count == 0)
681 goto satisfied;
682 }
683 }
684
685 if (atomic_load(&CPU->nrdy)) {
686 /*
687 * Be a little bit light-weight and let migrated threads run.
688 *
689 */
690 scheduler();
691 } else {
692 /*
693 * We failed to migrate a single thread.
694 * Give up this turn.
695 *
696 */
697 goto loop;
698 }
699
700 goto not_satisfied;
701
702satisfied:
703 goto loop;
704}
705#endif /* CONFIG_SMP */
706
707/** Print information about threads & scheduler queues
708 *
709 */
710void sched_print_list(void)
711{
712 size_t cpu;
713 for (cpu = 0; cpu < config.cpu_count; cpu++) {
714 if (!cpus[cpu].active)
715 continue;
716
717 printf("cpu%u: address=%p, nrdy=%zu\n",
718 cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy));
719
720 unsigned int i;
721 for (i = 0; i < RQ_COUNT; i++) {
722 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
723 if (cpus[cpu].rq[i].n == 0) {
724 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
725 continue;
726 }
727
728 printf("\trq[%u]: ", i);
729 list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
730 thread) {
731 printf("%" PRIu64 "(%s) ", thread->tid,
732 thread_states[thread->state]);
733 }
734 printf("\n");
735
736 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
737 }
738 }
739}
740
741/** @}
742 */
Note: See TracBrowser for help on using the repository browser.