source: mainline/kernel/generic/src/proc/scheduler.c@ 4760793

Last change on this file since 4760793 was 4760793, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 18 months ago

Add CPU_LOCAL alongside CPU and segregate fields that are only used locally

This makes it more clear which fields can be used without synchronization
and which need more care.

  • Property mode set to 100644
File size: 16.4 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup kernel_generic_proc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <assert.h>
42#include <atomic.h>
43#include <proc/scheduler.h>
44#include <proc/thread.h>
45#include <proc/task.h>
46#include <mm/frame.h>
47#include <mm/page.h>
48#include <mm/as.h>
49#include <time/timeout.h>
50#include <time/delay.h>
51#include <arch/asm.h>
52#include <arch/faddr.h>
53#include <arch/cycle.h>
54#include <atomic.h>
55#include <synch/spinlock.h>
56#include <config.h>
57#include <context.h>
58#include <fpu_context.h>
59#include <halt.h>
60#include <arch.h>
61#include <adt/list.h>
62#include <panic.h>
63#include <cpu.h>
64#include <stdio.h>
65#include <log.h>
66#include <stacktrace.h>
67
68static void scheduler_separated_stack(void);
69
70atomic_size_t nrdy; /**< Number of ready threads in the system. */
71
72/** Take actions before new thread runs.
73 *
74 * Perform actions that need to be
75 * taken before the newly selected
76 * thread is passed control.
77 *
78 * THREAD->lock is locked on entry
79 *
80 */
81static void before_thread_runs(void)
82{
83 before_thread_runs_arch();
84
85#ifdef CONFIG_FPU_LAZY
86 /*
87 * The only concurrent modification possible for fpu_owner here is
88 * another thread changing it from itself to NULL in its destructor.
89 */
90 thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
91 memory_order_relaxed);
92
93 if (THREAD == owner)
94 fpu_enable();
95 else
96 fpu_disable();
97#elif defined CONFIG_FPU
98 fpu_enable();
99 if (THREAD->fpu_context_exists)
100 fpu_context_restore(&THREAD->fpu_context);
101 else {
102 fpu_init();
103 THREAD->fpu_context_exists = true;
104 }
105#endif
106
107#ifdef CONFIG_UDEBUG
108 if (THREAD->btrace) {
109 istate_t *istate = THREAD->udebug.uspace_state;
110 if (istate != NULL) {
111 printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
112 stack_trace_istate(istate);
113 }
114
115 THREAD->btrace = false;
116 }
117#endif
118}
119
120/** Take actions after THREAD had run.
121 *
122 * Perform actions that need to be
123 * taken after the running thread
124 * had been preempted by the scheduler.
125 *
126 * THREAD->lock is locked on entry
127 *
128 */
129static void after_thread_ran(void)
130{
131 after_thread_ran_arch();
132}
133
134#ifdef CONFIG_FPU_LAZY
135void scheduler_fpu_lazy_request(void)
136{
137 fpu_enable();
138
139 /* We need this lock to ensure synchronization with thread destructor. */
140 irq_spinlock_lock(&CPU->fpu_lock, false);
141
142 /* Save old context */
143 thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
144 if (owner != NULL) {
145 fpu_context_save(&owner->fpu_context);
146 atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
147 }
148
149 irq_spinlock_unlock(&CPU->fpu_lock, false);
150
151 if (THREAD->fpu_context_exists) {
152 fpu_context_restore(&THREAD->fpu_context);
153 } else {
154 fpu_init();
155 THREAD->fpu_context_exists = true;
156 }
157
158 atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
159}
160#endif /* CONFIG_FPU_LAZY */
161
162/** Initialize scheduler
163 *
164 * Initialize kernel scheduler.
165 *
166 */
167void scheduler_init(void)
168{
169}
170
171/** Get thread to be scheduled
172 *
173 * Get the optimal thread to be scheduled
174 * according to thread accounting and scheduler
175 * policy.
176 *
177 * @return Thread to be scheduled.
178 *
179 */
180static thread_t *try_find_thread(int *rq_index)
181{
182 assert(interrupts_disabled());
183 assert(CPU != NULL);
184
185 if (atomic_load(&CPU->nrdy) == 0)
186 return NULL;
187
188 for (int i = 0; i < RQ_COUNT; i++) {
189 irq_spinlock_lock(&(CPU->rq[i].lock), false);
190 if (CPU->rq[i].n == 0) {
191 /*
192 * If this queue is empty, try a lower-priority queue.
193 */
194 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
195 continue;
196 }
197
198 atomic_dec(&CPU->nrdy);
199 atomic_dec(&nrdy);
200 CPU->rq[i].n--;
201
202 /*
203 * Take the first thread from the queue.
204 */
205 thread_t *thread = list_get_instance(
206 list_first(&CPU->rq[i].rq), thread_t, rq_link);
207 list_remove(&thread->rq_link);
208
209 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
210
211 thread->cpu = CPU;
212 thread->priority = i; /* Correct rq index */
213
214 /* Time allocation in microseconds. */
215 uint64_t time_to_run = (i + 1) * 10000;
216
217 /* This is safe because interrupts are disabled. */
218 CPU_LOCAL->preempt_deadline =
219 CPU_LOCAL->current_clock_tick + us2ticks(time_to_run);
220
221 /*
222 * Clear the stolen flag so that it can be migrated
223 * when load balancing needs emerge.
224 */
225 thread->stolen = false;
226 irq_spinlock_unlock(&thread->lock, false);
227
228 *rq_index = i;
229 return thread;
230 }
231
232 return NULL;
233}
234
235/** Get thread to be scheduled
236 *
237 * Get the optimal thread to be scheduled
238 * according to thread accounting and scheduler
239 * policy.
240 *
241 * @return Thread to be scheduled.
242 *
243 */
244static thread_t *find_best_thread(int *rq_index)
245{
246 assert(interrupts_disabled());
247 assert(CPU != NULL);
248
249 while (true) {
250 thread_t *thread = try_find_thread(rq_index);
251
252 if (thread != NULL)
253 return thread;
254
255 /*
256 * For there was nothing to run, the CPU goes to sleep
257 * until a hardware interrupt or an IPI comes.
258 * This improves energy saving and hyperthreading.
259 */
260 CPU_LOCAL->idle = true;
261
262 /*
263 * Go to sleep with interrupts enabled.
264 * Ideally, this should be atomic, but this is not guaranteed on
265 * all platforms yet, so it is possible we will go sleep when
266 * a thread has just become available.
267 */
268 cpu_interruptible_sleep();
269 }
270}
271
272static void switch_task(task_t *task)
273{
274 /* If the task stays the same, a lot of work is avoided. */
275 if (TASK == task)
276 return;
277
278 as_t *old_as = AS;
279 as_t *new_as = task->as;
280
281 /* It is possible for two tasks to share one address space. */
282 if (old_as != new_as)
283 as_switch(old_as, new_as);
284
285 if (TASK)
286 task_release(TASK);
287
288 TASK = task;
289
290 task_hold(TASK);
291
292 before_task_runs_arch();
293}
294
295/** Prevent rq starvation
296 *
297 * Prevent low priority threads from starving in rq's.
298 *
299 * When the function decides to relink rq's, it reconnects
300 * respective pointers so that in result threads with 'pri'
301 * greater or equal start are moved to a higher-priority queue.
302 *
303 * @param start Threshold priority.
304 *
305 */
306static void relink_rq(int start)
307{
308 if (CPU_LOCAL->current_clock_tick < CPU_LOCAL->relink_deadline)
309 return;
310
311 CPU_LOCAL->relink_deadline = CPU_LOCAL->current_clock_tick + NEEDS_RELINK_MAX;
312
313 /* Temporary cache for lists we are moving. */
314 list_t list;
315 list_initialize(&list);
316
317 size_t n = 0;
318
319 /* Move every list (except the one with highest priority) one level up. */
320 for (int i = RQ_COUNT - 1; i > start; i--) {
321 irq_spinlock_lock(&CPU->rq[i].lock, false);
322
323 /* Swap lists. */
324 list_swap(&CPU->rq[i].rq, &list);
325
326 /* Swap number of items. */
327 size_t tmpn = CPU->rq[i].n;
328 CPU->rq[i].n = n;
329 n = tmpn;
330
331 irq_spinlock_unlock(&CPU->rq[i].lock, false);
332 }
333
334 /* Append the contents of rq[start + 1] to rq[start]. */
335 if (n != 0) {
336 irq_spinlock_lock(&CPU->rq[start].lock, false);
337 list_concat(&CPU->rq[start].rq, &list);
338 CPU->rq[start].n += n;
339 irq_spinlock_unlock(&CPU->rq[start].lock, false);
340 }
341}
342
343void scheduler(void)
344{
345 ipl_t ipl = interrupts_disable();
346
347 if (atomic_load(&haltstate))
348 halt();
349
350 if (THREAD) {
351 irq_spinlock_lock(&THREAD->lock, false);
352 }
353
354 scheduler_locked(ipl);
355}
356
357/** The scheduler
358 *
359 * The thread scheduling procedure.
360 * Passes control directly to
361 * scheduler_separated_stack().
362 *
363 */
364void scheduler_locked(ipl_t ipl)
365{
366 assert(CPU != NULL);
367
368 if (THREAD) {
369 /* Update thread kernel accounting */
370 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
371
372#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
373 fpu_context_save(&THREAD->fpu_context);
374#endif
375 if (!context_save(&THREAD->saved_context)) {
376 /*
377 * This is the place where threads leave scheduler();
378 */
379
380 /* Save current CPU cycle */
381 THREAD->last_cycle = get_cycle();
382
383 irq_spinlock_unlock(&THREAD->lock, false);
384 interrupts_restore(THREAD->saved_ipl);
385
386 return;
387 }
388
389 /*
390 * Interrupt priority level of preempted thread is recorded
391 * here to facilitate scheduler() invocations from
392 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
393 *
394 */
395 THREAD->saved_ipl = ipl;
396 }
397
398 /*
399 * Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
400 * and preemption counter. At this point CURRENT could be coming either
401 * from THREAD's or CPU's stack.
402 *
403 */
404 current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
405
406 /*
407 * We may not keep the old stack.
408 * Reason: If we kept the old stack and got blocked, for instance, in
409 * find_best_thread(), the old thread could get rescheduled by another
410 * CPU and overwrite the part of its own stack that was also used by
411 * the scheduler on this CPU.
412 *
413 * Moreover, we have to bypass the compiler-generated POP sequence
414 * which is fooled by SP being set to the very top of the stack.
415 * Therefore the scheduler() function continues in
416 * scheduler_separated_stack().
417 *
418 */
419 context_t ctx;
420 context_save(&ctx);
421 context_set(&ctx, FADDR(scheduler_separated_stack),
422 (uintptr_t) CPU_LOCAL->stack, STACK_SIZE);
423 context_restore(&ctx);
424
425 /* Not reached */
426}
427
428/** Scheduler stack switch wrapper
429 *
430 * Second part of the scheduler() function
431 * using new stack. Handling the actual context
432 * switch to a new thread.
433 *
434 */
435void scheduler_separated_stack(void)
436{
437 assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
438 assert(CPU != NULL);
439 assert(interrupts_disabled());
440
441 if (THREAD) {
442 /* Must be run after the switch to scheduler stack */
443 after_thread_ran();
444
445 switch (THREAD->state) {
446 case Running:
447 irq_spinlock_unlock(&THREAD->lock, false);
448 thread_ready(THREAD);
449 break;
450
451 case Exiting:
452 irq_spinlock_unlock(&THREAD->lock, false);
453 waitq_close(&THREAD->join_wq);
454
455 /*
456 * Release the reference CPU has for the thread.
457 * If there are no other references (e.g. threads calling join),
458 * the thread structure is deallocated.
459 */
460 thread_put(THREAD);
461 break;
462
463 case Sleeping:
464 /*
465 * Prefer the thread after it's woken up.
466 */
467 THREAD->priority = -1;
468 irq_spinlock_unlock(&THREAD->lock, false);
469 break;
470
471 default:
472 /*
473 * Entering state is unexpected.
474 */
475 panic("tid%" PRIu64 ": unexpected state %s.",
476 THREAD->tid, thread_states[THREAD->state]);
477 break;
478 }
479
480 THREAD = NULL;
481 }
482
483 int rq_index;
484 THREAD = find_best_thread(&rq_index);
485
486 relink_rq(rq_index);
487
488 switch_task(THREAD->task);
489
490 irq_spinlock_lock(&THREAD->lock, false);
491 THREAD->state = Running;
492
493#ifdef SCHEDULER_VERBOSE
494 log(LF_OTHER, LVL_DEBUG,
495 "cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
496 ", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
497 THREAD->ticks, atomic_load(&CPU->nrdy));
498#endif
499
500 /*
501 * Some architectures provide late kernel PA2KA(identity)
502 * mapping in a page fault handler. However, the page fault
503 * handler uses the kernel stack of the running thread and
504 * therefore cannot be used to map it. The kernel stack, if
505 * necessary, is to be mapped in before_thread_runs(). This
506 * function must be executed before the switch to the new stack.
507 */
508 before_thread_runs();
509
510 /*
511 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
512 * thread's stack.
513 */
514 current_copy(CURRENT, (current_t *) THREAD->kstack);
515
516 context_restore(&THREAD->saved_context);
517
518 /* Not reached */
519}
520
521#ifdef CONFIG_SMP
522
523static thread_t *steal_thread_from(cpu_t *old_cpu, int i)
524{
525 runq_t *old_rq = &old_cpu->rq[i];
526 runq_t *new_rq = &CPU->rq[i];
527
528 ipl_t ipl = interrupts_disable();
529
530 irq_spinlock_lock(&old_rq->lock, false);
531
532 /*
533 * If fpu_owner is any thread in the list, its store is seen here thanks to
534 * the runqueue lock.
535 */
536 thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
537 memory_order_relaxed);
538
539 /* Search rq from the back */
540 list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
541
542 irq_spinlock_lock(&thread->lock, false);
543
544 /*
545 * Do not steal CPU-wired threads, threads
546 * already stolen, threads for which migration
547 * was temporarily disabled or threads whose
548 * FPU context is still in the CPU.
549 */
550 if (thread->stolen || thread->nomigrate ||
551 thread == fpu_owner) {
552 irq_spinlock_unlock(&thread->lock, false);
553 continue;
554 }
555
556 thread->stolen = true;
557 thread->cpu = CPU;
558
559 irq_spinlock_unlock(&thread->lock, false);
560
561 /*
562 * Ready thread on local CPU
563 */
564
565#ifdef KCPULB_VERBOSE
566 log(LF_OTHER, LVL_DEBUG,
567 "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
568 "nrdy=%ld, avg=%ld", CPU->id, thread->tid,
569 CPU->id, atomic_load(&CPU->nrdy),
570 atomic_load(&nrdy) / config.cpu_active);
571#endif
572
573 /* Remove thread from ready queue. */
574 old_rq->n--;
575 list_remove(&thread->rq_link);
576 irq_spinlock_unlock(&old_rq->lock, false);
577
578 /* Append thread to local queue. */
579 irq_spinlock_lock(&new_rq->lock, false);
580 list_append(&thread->rq_link, &new_rq->rq);
581 new_rq->n++;
582 irq_spinlock_unlock(&new_rq->lock, false);
583
584 atomic_dec(&old_cpu->nrdy);
585 atomic_inc(&CPU->nrdy);
586 interrupts_restore(ipl);
587 return thread;
588 }
589
590 irq_spinlock_unlock(&old_rq->lock, false);
591 interrupts_restore(ipl);
592 return NULL;
593}
594
595/** Load balancing thread
596 *
597 * SMP load balancing thread, supervising thread supplies
598 * for the CPU it's wired to.
599 *
600 * @param arg Generic thread argument (unused).
601 *
602 */
603void kcpulb(void *arg)
604{
605 size_t average;
606 size_t rdy;
607
608loop:
609 /*
610 * Work in 1s intervals.
611 */
612 thread_sleep(1);
613
614not_satisfied:
615 /*
616 * Calculate the number of threads that will be migrated/stolen from
617 * other CPU's. Note that situation can have changed between two
618 * passes. Each time get the most up to date counts.
619 *
620 */
621 average = atomic_load(&nrdy) / config.cpu_active + 1;
622 rdy = atomic_load(&CPU->nrdy);
623
624 if (average <= rdy)
625 goto satisfied;
626
627 size_t count = average - rdy;
628
629 /*
630 * Searching least priority queues on all CPU's first and most priority
631 * queues on all CPU's last.
632 */
633 size_t acpu;
634 int rq;
635
636 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
637 for (acpu = 0; acpu < config.cpu_active; acpu++) {
638 cpu_t *cpu = &cpus[acpu];
639
640 /*
641 * Not interested in ourselves.
642 * Doesn't require interrupt disabling for kcpulb has
643 * THREAD_FLAG_WIRED.
644 *
645 */
646 if (CPU == cpu)
647 continue;
648
649 if (atomic_load(&cpu->nrdy) <= average)
650 continue;
651
652 if (steal_thread_from(cpu, rq) && --count == 0)
653 goto satisfied;
654 }
655 }
656
657 if (atomic_load(&CPU->nrdy)) {
658 /*
659 * Be a little bit light-weight and let migrated threads run.
660 *
661 */
662 scheduler();
663 } else {
664 /*
665 * We failed to migrate a single thread.
666 * Give up this turn.
667 *
668 */
669 goto loop;
670 }
671
672 goto not_satisfied;
673
674satisfied:
675 goto loop;
676}
677#endif /* CONFIG_SMP */
678
679/** Print information about threads & scheduler queues
680 *
681 */
682void sched_print_list(void)
683{
684 size_t cpu;
685 for (cpu = 0; cpu < config.cpu_count; cpu++) {
686 if (!cpus[cpu].active)
687 continue;
688
689 printf("cpu%u: address=%p, nrdy=%zu\n",
690 cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy));
691
692 unsigned int i;
693 for (i = 0; i < RQ_COUNT; i++) {
694 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
695 if (cpus[cpu].rq[i].n == 0) {
696 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
697 continue;
698 }
699
700 printf("\trq[%u]: ", i);
701 list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
702 thread) {
703 printf("%" PRIu64 "(%s) ", thread->tid,
704 thread_states[thread->state]);
705 }
706 printf("\n");
707
708 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
709 }
710 }
711}
712
713/** @}
714 */
Note: See TracBrowser for help on using the repository browser.