source: mainline/kernel/generic/src/proc/scheduler.c@ 06f81c4

ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 06f81c4 was 06f81c4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 years ago

Check cpu_t::fpu_owner directly instead of thread_t::fpu_context_engaged

This results in net reduction in locking.

  • Property mode set to 100644
File size: 16.2 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup kernel_generic_proc
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Scheduler and load balancing.
36 *
37 * This file contains the scheduler and kcpulb kernel thread which
38 * performs load-balancing of per-CPU run queues.
39 */
40
41#include <assert.h>
42#include <atomic.h>
43#include <proc/scheduler.h>
44#include <proc/thread.h>
45#include <proc/task.h>
46#include <mm/frame.h>
47#include <mm/page.h>
48#include <mm/as.h>
49#include <time/timeout.h>
50#include <time/delay.h>
51#include <arch/asm.h>
52#include <arch/faddr.h>
53#include <arch/cycle.h>
54#include <atomic.h>
55#include <synch/spinlock.h>
56#include <config.h>
57#include <context.h>
58#include <fpu_context.h>
59#include <halt.h>
60#include <arch.h>
61#include <adt/list.h>
62#include <panic.h>
63#include <cpu.h>
64#include <stdio.h>
65#include <log.h>
66#include <stacktrace.h>
67
68static void scheduler_separated_stack(void);
69
70atomic_size_t nrdy; /**< Number of ready threads in the system. */
71
72/** Take actions before new thread runs.
73 *
74 * Perform actions that need to be
75 * taken before the newly selected
76 * thread is passed control.
77 *
78 * THREAD->lock is locked on entry
79 *
80 */
81static void before_thread_runs(void)
82{
83 before_thread_runs_arch();
84
85#ifdef CONFIG_FPU_LAZY
86 irq_spinlock_lock(&CPU->fpu_lock, true);
87
88 if (THREAD == CPU->fpu_owner)
89 fpu_enable();
90 else
91 fpu_disable();
92
93 irq_spinlock_unlock(&CPU->fpu_lock, true);
94#elif defined CONFIG_FPU
95 fpu_enable();
96 if (THREAD->fpu_context_exists)
97 fpu_context_restore(&THREAD->fpu_context);
98 else {
99 fpu_init();
100 THREAD->fpu_context_exists = true;
101 }
102#endif
103
104#ifdef CONFIG_UDEBUG
105 if (THREAD->btrace) {
106 istate_t *istate = THREAD->udebug.uspace_state;
107 if (istate != NULL) {
108 printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
109 stack_trace_istate(istate);
110 }
111
112 THREAD->btrace = false;
113 }
114#endif
115}
116
117/** Take actions after THREAD had run.
118 *
119 * Perform actions that need to be
120 * taken after the running thread
121 * had been preempted by the scheduler.
122 *
123 * THREAD->lock is locked on entry
124 *
125 */
126static void after_thread_ran(void)
127{
128 after_thread_ran_arch();
129}
130
131#ifdef CONFIG_FPU_LAZY
132void scheduler_fpu_lazy_request(void)
133{
134 fpu_enable();
135 irq_spinlock_lock(&CPU->fpu_lock, false);
136
137 /* Save old context */
138 if (CPU->fpu_owner != NULL) {
139 fpu_context_save(&CPU->fpu_owner->fpu_context);
140 CPU->fpu_owner = NULL;
141 }
142
143 if (THREAD->fpu_context_exists) {
144 fpu_context_restore(&THREAD->fpu_context);
145 } else {
146 fpu_init();
147 THREAD->fpu_context_exists = true;
148 }
149
150 CPU->fpu_owner = THREAD;
151
152 irq_spinlock_unlock(&CPU->fpu_lock, false);
153}
154#endif /* CONFIG_FPU_LAZY */
155
156/** Initialize scheduler
157 *
158 * Initialize kernel scheduler.
159 *
160 */
161void scheduler_init(void)
162{
163}
164
165/** Get thread to be scheduled
166 *
167 * Get the optimal thread to be scheduled
168 * according to thread accounting and scheduler
169 * policy.
170 *
171 * @return Thread to be scheduled.
172 *
173 */
174static thread_t *find_best_thread(void)
175{
176 assert(CPU != NULL);
177
178loop:
179 if (atomic_load(&CPU->nrdy) == 0) {
180 /*
181 * For there was nothing to run, the CPU goes to sleep
182 * until a hardware interrupt or an IPI comes.
183 * This improves energy saving and hyperthreading.
184 */
185 CPU->idle = true;
186
187 /*
188 * Go to sleep with interrupts enabled.
189 * Ideally, this should be atomic, but this is not guaranteed on
190 * all platforms yet, so it is possible we will go sleep when
191 * a thread has just become available.
192 */
193 cpu_interruptible_sleep();
194
195 /* Interrupts are disabled again. */
196 goto loop;
197 }
198
199 assert(!CPU->idle);
200
201 unsigned int i;
202 for (i = 0; i < RQ_COUNT; i++) {
203 irq_spinlock_lock(&(CPU->rq[i].lock), false);
204 if (CPU->rq[i].n == 0) {
205 /*
206 * If this queue is empty, try a lower-priority queue.
207 */
208 irq_spinlock_unlock(&(CPU->rq[i].lock), false);
209 continue;
210 }
211
212 atomic_dec(&CPU->nrdy);
213 atomic_dec(&nrdy);
214 CPU->rq[i].n--;
215
216 /*
217 * Take the first thread from the queue.
218 */
219 thread_t *thread = list_get_instance(
220 list_first(&CPU->rq[i].rq), thread_t, rq_link);
221 list_remove(&thread->rq_link);
222
223 irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
224
225 thread->cpu = CPU;
226 thread->priority = i; /* Correct rq index */
227
228 /* Time allocation in microseconds. */
229 uint64_t time_to_run = (i + 1) * 10000;
230
231 /* This is safe because interrupts are disabled. */
232 CPU->preempt_deadline = CPU->current_clock_tick + us2ticks(time_to_run);
233
234 /*
235 * Clear the stolen flag so that it can be migrated
236 * when load balancing needs emerge.
237 */
238 thread->stolen = false;
239 irq_spinlock_unlock(&thread->lock, false);
240
241 return thread;
242 }
243
244 goto loop;
245}
246
247static void switch_task(task_t *task)
248{
249 /* If the task stays the same, a lot of work is avoided. */
250 if (TASK == task)
251 return;
252
253 as_t *old_as = AS;
254 as_t *new_as = task->as;
255
256 /* It is possible for two tasks to share one address space. */
257 if (old_as != new_as)
258 as_switch(old_as, new_as);
259
260 if (TASK)
261 task_release(TASK);
262
263 TASK = task;
264
265 task_hold(TASK);
266
267 before_task_runs_arch();
268}
269
270/** Prevent rq starvation
271 *
272 * Prevent low priority threads from starving in rq's.
273 *
274 * When the function decides to relink rq's, it reconnects
275 * respective pointers so that in result threads with 'pri'
276 * greater or equal start are moved to a higher-priority queue.
277 *
278 * @param start Threshold priority.
279 *
280 */
281static void relink_rq(int start)
282{
283 if (CPU->current_clock_tick < CPU->relink_deadline)
284 return;
285
286 CPU->relink_deadline = CPU->current_clock_tick + NEEDS_RELINK_MAX;
287
288 /* Temporary cache for lists we are moving. */
289 list_t list;
290 list_initialize(&list);
291
292 size_t n = 0;
293
294 /* Move every list (except the one with highest priority) one level up. */
295 for (int i = RQ_COUNT - 1; i > start; i--) {
296 irq_spinlock_lock(&CPU->rq[i].lock, false);
297
298 /* Swap lists. */
299 list_swap(&CPU->rq[i].rq, &list);
300
301 /* Swap number of items. */
302 size_t tmpn = CPU->rq[i].n;
303 CPU->rq[i].n = n;
304 n = tmpn;
305
306 irq_spinlock_unlock(&CPU->rq[i].lock, false);
307 }
308
309 /* Append the contents of rq[start + 1] to rq[start]. */
310 if (n != 0) {
311 irq_spinlock_lock(&CPU->rq[start].lock, false);
312 list_concat(&CPU->rq[start].rq, &list);
313 CPU->rq[start].n += n;
314 irq_spinlock_unlock(&CPU->rq[start].lock, false);
315 }
316}
317
318void scheduler(void)
319{
320 ipl_t ipl = interrupts_disable();
321
322 if (atomic_load(&haltstate))
323 halt();
324
325 if (THREAD) {
326 irq_spinlock_lock(&THREAD->lock, false);
327 }
328
329 scheduler_locked(ipl);
330}
331
332/** The scheduler
333 *
334 * The thread scheduling procedure.
335 * Passes control directly to
336 * scheduler_separated_stack().
337 *
338 */
339void scheduler_locked(ipl_t ipl)
340{
341 assert(CPU != NULL);
342
343 if (THREAD) {
344 /* Update thread kernel accounting */
345 THREAD->kcycles += get_cycle() - THREAD->last_cycle;
346
347#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
348 fpu_context_save(&THREAD->fpu_context);
349#endif
350 if (!context_save(&THREAD->saved_context)) {
351 /*
352 * This is the place where threads leave scheduler();
353 */
354
355 /* Save current CPU cycle */
356 THREAD->last_cycle = get_cycle();
357
358 irq_spinlock_unlock(&THREAD->lock, false);
359 interrupts_restore(THREAD->saved_ipl);
360
361 return;
362 }
363
364 /*
365 * Interrupt priority level of preempted thread is recorded
366 * here to facilitate scheduler() invocations from
367 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
368 *
369 */
370 THREAD->saved_ipl = ipl;
371 }
372
373 /*
374 * Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
375 * and preemption counter. At this point CURRENT could be coming either
376 * from THREAD's or CPU's stack.
377 *
378 */
379 current_copy(CURRENT, (current_t *) CPU->stack);
380
381 /*
382 * We may not keep the old stack.
383 * Reason: If we kept the old stack and got blocked, for instance, in
384 * find_best_thread(), the old thread could get rescheduled by another
385 * CPU and overwrite the part of its own stack that was also used by
386 * the scheduler on this CPU.
387 *
388 * Moreover, we have to bypass the compiler-generated POP sequence
389 * which is fooled by SP being set to the very top of the stack.
390 * Therefore the scheduler() function continues in
391 * scheduler_separated_stack().
392 *
393 */
394 context_t ctx;
395 context_save(&ctx);
396 context_set(&ctx, FADDR(scheduler_separated_stack),
397 (uintptr_t) CPU->stack, STACK_SIZE);
398 context_restore(&ctx);
399
400 /* Not reached */
401}
402
403/** Scheduler stack switch wrapper
404 *
405 * Second part of the scheduler() function
406 * using new stack. Handling the actual context
407 * switch to a new thread.
408 *
409 */
410void scheduler_separated_stack(void)
411{
412 assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
413 assert(CPU != NULL);
414 assert(interrupts_disabled());
415
416 if (THREAD) {
417 /* Must be run after the switch to scheduler stack */
418 after_thread_ran();
419
420 switch (THREAD->state) {
421 case Running:
422 irq_spinlock_unlock(&THREAD->lock, false);
423 thread_ready(THREAD);
424 break;
425
426 case Exiting:
427 irq_spinlock_unlock(&THREAD->lock, false);
428 waitq_close(&THREAD->join_wq);
429
430 /*
431 * Release the reference CPU has for the thread.
432 * If there are no other references (e.g. threads calling join),
433 * the thread structure is deallocated.
434 */
435 thread_put(THREAD);
436 break;
437
438 case Sleeping:
439 /*
440 * Prefer the thread after it's woken up.
441 */
442 THREAD->priority = -1;
443 irq_spinlock_unlock(&THREAD->lock, false);
444 break;
445
446 default:
447 /*
448 * Entering state is unexpected.
449 */
450 panic("tid%" PRIu64 ": unexpected state %s.",
451 THREAD->tid, thread_states[THREAD->state]);
452 break;
453 }
454
455 THREAD = NULL;
456 }
457
458 THREAD = find_best_thread();
459
460 irq_spinlock_lock(&THREAD->lock, false);
461 int priority = THREAD->priority;
462 irq_spinlock_unlock(&THREAD->lock, false);
463
464 relink_rq(priority);
465
466 switch_task(THREAD->task);
467
468 irq_spinlock_lock(&THREAD->lock, false);
469 THREAD->state = Running;
470
471#ifdef SCHEDULER_VERBOSE
472 log(LF_OTHER, LVL_DEBUG,
473 "cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
474 ", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
475 THREAD->ticks, atomic_load(&CPU->nrdy));
476#endif
477
478 /*
479 * Some architectures provide late kernel PA2KA(identity)
480 * mapping in a page fault handler. However, the page fault
481 * handler uses the kernel stack of the running thread and
482 * therefore cannot be used to map it. The kernel stack, if
483 * necessary, is to be mapped in before_thread_runs(). This
484 * function must be executed before the switch to the new stack.
485 */
486 before_thread_runs();
487
488 /*
489 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
490 * thread's stack.
491 */
492 current_copy(CURRENT, (current_t *) THREAD->kstack);
493
494 context_restore(&THREAD->saved_context);
495
496 /* Not reached */
497}
498
499#ifdef CONFIG_SMP
500
501static inline void fpu_owner_lock(cpu_t *cpu)
502{
503#ifdef CONFIG_FPU_LAZY
504 irq_spinlock_lock(&cpu->fpu_lock, false);
505#endif
506}
507
508static inline void fpu_owner_unlock(cpu_t *cpu)
509{
510#ifdef CONFIG_FPU_LAZY
511 irq_spinlock_unlock(&cpu->fpu_lock, false);
512#endif
513}
514
515static inline thread_t *fpu_owner(cpu_t *cpu)
516{
517#ifdef CONFIG_FPU_LAZY
518 assert(irq_spinlock_locked(&cpu->fpu_lock));
519 return cpu->fpu_owner;
520#else
521 return NULL;
522#endif
523}
524
525static thread_t *steal_thread_from(cpu_t *old_cpu, int i)
526{
527 runq_t *old_rq = &old_cpu->rq[i];
528 runq_t *new_rq = &CPU->rq[i];
529
530 ipl_t ipl = interrupts_disable();
531
532 fpu_owner_lock(old_cpu);
533 irq_spinlock_lock(&old_rq->lock, false);
534
535 /* Search rq from the back */
536 list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
537
538 irq_spinlock_lock(&thread->lock, false);
539
540 /*
541 * Do not steal CPU-wired threads, threads
542 * already stolen, threads for which migration
543 * was temporarily disabled or threads whose
544 * FPU context is still in the CPU.
545 */
546 if (thread->stolen || thread->nomigrate ||
547 thread == fpu_owner(old_cpu)) {
548 irq_spinlock_unlock(&thread->lock, false);
549 continue;
550 }
551
552 fpu_owner_unlock(old_cpu);
553
554 thread->stolen = true;
555 thread->cpu = CPU;
556
557 irq_spinlock_unlock(&thread->lock, false);
558
559 /*
560 * Ready thread on local CPU
561 */
562
563#ifdef KCPULB_VERBOSE
564 log(LF_OTHER, LVL_DEBUG,
565 "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
566 "nrdy=%ld, avg=%ld", CPU->id, thread->tid,
567 CPU->id, atomic_load(&CPU->nrdy),
568 atomic_load(&nrdy) / config.cpu_active);
569#endif
570
571 /* Remove thread from ready queue. */
572 old_rq->n--;
573 list_remove(&thread->rq_link);
574 irq_spinlock_unlock(&old_rq->lock, false);
575
576 /* Append thread to local queue. */
577 irq_spinlock_lock(&new_rq->lock, false);
578 list_append(&thread->rq_link, &new_rq->rq);
579 new_rq->n++;
580 irq_spinlock_unlock(&new_rq->lock, false);
581
582 atomic_dec(&old_cpu->nrdy);
583 atomic_inc(&CPU->nrdy);
584 interrupts_restore(ipl);
585 return thread;
586 }
587
588 irq_spinlock_unlock(&old_rq->lock, false);
589 fpu_owner_unlock(old_cpu);
590 interrupts_restore(ipl);
591 return NULL;
592}
593
594/** Load balancing thread
595 *
596 * SMP load balancing thread, supervising thread supplies
597 * for the CPU it's wired to.
598 *
599 * @param arg Generic thread argument (unused).
600 *
601 */
602void kcpulb(void *arg)
603{
604 size_t average;
605 size_t rdy;
606
607loop:
608 /*
609 * Work in 1s intervals.
610 */
611 thread_sleep(1);
612
613not_satisfied:
614 /*
615 * Calculate the number of threads that will be migrated/stolen from
616 * other CPU's. Note that situation can have changed between two
617 * passes. Each time get the most up to date counts.
618 *
619 */
620 average = atomic_load(&nrdy) / config.cpu_active + 1;
621 rdy = atomic_load(&CPU->nrdy);
622
623 if (average <= rdy)
624 goto satisfied;
625
626 size_t count = average - rdy;
627
628 /*
629 * Searching least priority queues on all CPU's first and most priority
630 * queues on all CPU's last.
631 */
632 size_t acpu;
633 int rq;
634
635 for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
636 for (acpu = 0; acpu < config.cpu_active; acpu++) {
637 cpu_t *cpu = &cpus[acpu];
638
639 /*
640 * Not interested in ourselves.
641 * Doesn't require interrupt disabling for kcpulb has
642 * THREAD_FLAG_WIRED.
643 *
644 */
645 if (CPU == cpu)
646 continue;
647
648 if (atomic_load(&cpu->nrdy) <= average)
649 continue;
650
651 if (steal_thread_from(cpu, rq) && --count == 0)
652 goto satisfied;
653 }
654 }
655
656 if (atomic_load(&CPU->nrdy)) {
657 /*
658 * Be a little bit light-weight and let migrated threads run.
659 *
660 */
661 scheduler();
662 } else {
663 /*
664 * We failed to migrate a single thread.
665 * Give up this turn.
666 *
667 */
668 goto loop;
669 }
670
671 goto not_satisfied;
672
673satisfied:
674 goto loop;
675}
676#endif /* CONFIG_SMP */
677
678/** Print information about threads & scheduler queues
679 *
680 */
681void sched_print_list(void)
682{
683 size_t cpu;
684 for (cpu = 0; cpu < config.cpu_count; cpu++) {
685 if (!cpus[cpu].active)
686 continue;
687
688 /* Technically a data race, but we don't really care in this case. */
689 int needs_relink = cpus[cpu].relink_deadline - cpus[cpu].current_clock_tick;
690
691 printf("cpu%u: address=%p, nrdy=%zu, needs_relink=%d\n",
692 cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy),
693 needs_relink);
694
695 unsigned int i;
696 for (i = 0; i < RQ_COUNT; i++) {
697 irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
698 if (cpus[cpu].rq[i].n == 0) {
699 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
700 continue;
701 }
702
703 printf("\trq[%u]: ", i);
704 list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
705 thread) {
706 printf("%" PRIu64 "(%s) ", thread->tid,
707 thread_states[thread->state]);
708 }
709 printf("\n");
710
711 irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
712 }
713 }
714}
715
716/** @}
717 */
Note: See TracBrowser for help on using the repository browser.