Context Navigation

scheduler.c@ 4760793

Visit:

Last change on this file since 4760793 was 4760793, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 18 months ago

Add CPU_LOCAL alongside CPU and segregate fields that are only used locally

This makes it more clear which fields can be used without synchronization
and which need more care.

Property mode set to 100644

File size: 16.4 KB

Line
1	/*
2	* Copyright (c) 2010 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup kernel_generic_proc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <assert.h>
42	#include <atomic.h>
43	#include <proc/scheduler.h>
44	#include <proc/thread.h>
45	#include <proc/task.h>
46	#include <mm/frame.h>
47	#include <mm/page.h>
48	#include <mm/as.h>
49	#include <time/timeout.h>
50	#include <time/delay.h>
51	#include <arch/asm.h>
52	#include <arch/faddr.h>
53	#include <arch/cycle.h>
54	#include <atomic.h>
55	#include <synch/spinlock.h>
56	#include <config.h>
57	#include <context.h>
58	#include <fpu_context.h>
59	#include <halt.h>
60	#include <arch.h>
61	#include <adt/list.h>
62	#include <panic.h>
63	#include <cpu.h>
64	#include <stdio.h>
65	#include <log.h>
66	#include <stacktrace.h>
67
68	static void scheduler_separated_stack(void);
69
70	atomic_size_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Take actions before new thread runs.
73	*
74	* Perform actions that need to be
75	* taken before the newly selected
76	* thread is passed control.
77	*
78	* THREAD->lock is locked on entry
79	*
80	*/
81	static void before_thread_runs(void)
82	{
83	before_thread_runs_arch();
84
85	#ifdef CONFIG_FPU_LAZY
86	/*
87	* The only concurrent modification possible for fpu_owner here is
88	* another thread changing it from itself to NULL in its destructor.
89	*/
90	thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
91	memory_order_relaxed);
92
93	if (THREAD == owner)
94	fpu_enable();
95	else
96	fpu_disable();
97	#elif defined CONFIG_FPU
98	fpu_enable();
99	if (THREAD->fpu_context_exists)
100	fpu_context_restore(&THREAD->fpu_context);
101	else {
102	fpu_init();
103	THREAD->fpu_context_exists = true;
104	}
105	#endif
106
107	#ifdef CONFIG_UDEBUG
108	if (THREAD->btrace) {
109	istate_t *istate = THREAD->udebug.uspace_state;
110	if (istate != NULL) {
111	printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
112	stack_trace_istate(istate);
113	}
114
115	THREAD->btrace = false;
116	}
117	#endif
118	}
119
120	/** Take actions after THREAD had run.
121	*
122	* Perform actions that need to be
123	* taken after the running thread
124	* had been preempted by the scheduler.
125	*
126	* THREAD->lock is locked on entry
127	*
128	*/
129	static void after_thread_ran(void)
130	{
131	after_thread_ran_arch();
132	}
133
134	#ifdef CONFIG_FPU_LAZY
135	void scheduler_fpu_lazy_request(void)
136	{
137	fpu_enable();
138
139	/* We need this lock to ensure synchronization with thread destructor. */
140	irq_spinlock_lock(&CPU->fpu_lock, false);
141
142	/* Save old context */
143	thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
144	if (owner != NULL) {
145	fpu_context_save(&owner->fpu_context);
146	atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
147	}
148
149	irq_spinlock_unlock(&CPU->fpu_lock, false);
150
151	if (THREAD->fpu_context_exists) {
152	fpu_context_restore(&THREAD->fpu_context);
153	} else {
154	fpu_init();
155	THREAD->fpu_context_exists = true;
156	}
157
158	atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
159	}
160	#endif /* CONFIG_FPU_LAZY */
161
162	/** Initialize scheduler
163	*
164	* Initialize kernel scheduler.
165	*
166	*/
167	void scheduler_init(void)
168	{
169	}
170
171	/** Get thread to be scheduled
172	*
173	* Get the optimal thread to be scheduled
174	* according to thread accounting and scheduler
175	* policy.
176	*
177	* @return Thread to be scheduled.
178	*
179	*/
180	static thread_t try_find_thread(int rq_index)
181	{
182	assert(interrupts_disabled());
183	assert(CPU != NULL);
184
185	if (atomic_load(&CPU->nrdy) == 0)
186	return NULL;
187
188	for (int i = 0; i < RQ_COUNT; i++) {
189	irq_spinlock_lock(&(CPU->rq[i].lock), false);
190	if (CPU->rq[i].n == 0) {
191	/*
192	* If this queue is empty, try a lower-priority queue.
193	*/
194	irq_spinlock_unlock(&(CPU->rq[i].lock), false);
195	continue;
196	}
197
198	atomic_dec(&CPU->nrdy);
199	atomic_dec(&nrdy);
200	CPU->rq[i].n--;
201
202	/*
203	* Take the first thread from the queue.
204	*/
205	thread_t *thread = list_get_instance(
206	list_first(&CPU->rq[i].rq), thread_t, rq_link);
207	list_remove(&thread->rq_link);
208
209	irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
210
211	thread->cpu = CPU;
212	thread->priority = i; /* Correct rq index */
213
214	/* Time allocation in microseconds. */
215	uint64_t time_to_run = (i + 1) * 10000;
216
217	/* This is safe because interrupts are disabled. */
218	CPU_LOCAL->preempt_deadline =
219	CPU_LOCAL->current_clock_tick + us2ticks(time_to_run);
220
221	/*
222	* Clear the stolen flag so that it can be migrated
223	* when load balancing needs emerge.
224	*/
225	thread->stolen = false;
226	irq_spinlock_unlock(&thread->lock, false);
227
228	*rq_index = i;
229	return thread;
230	}
231
232	return NULL;
233	}
234
235	/** Get thread to be scheduled
236	*
237	* Get the optimal thread to be scheduled
238	* according to thread accounting and scheduler
239	* policy.
240	*
241	* @return Thread to be scheduled.
242	*
243	*/
244	static thread_t find_best_thread(int rq_index)
245	{
246	assert(interrupts_disabled());
247	assert(CPU != NULL);
248
249	while (true) {
250	thread_t *thread = try_find_thread(rq_index);
251
252	if (thread != NULL)
253	return thread;
254
255	/*
256	* For there was nothing to run, the CPU goes to sleep
257	* until a hardware interrupt or an IPI comes.
258	* This improves energy saving and hyperthreading.
259	*/
260	CPU_LOCAL->idle = true;
261
262	/*
263	* Go to sleep with interrupts enabled.
264	* Ideally, this should be atomic, but this is not guaranteed on
265	* all platforms yet, so it is possible we will go sleep when
266	* a thread has just become available.
267	*/
268	cpu_interruptible_sleep();
269	}
270	}
271
272	static void switch_task(task_t *task)
273	{
274	/* If the task stays the same, a lot of work is avoided. */
275	if (TASK == task)
276	return;
277
278	as_t *old_as = AS;
279	as_t *new_as = task->as;
280
281	/* It is possible for two tasks to share one address space. */
282	if (old_as != new_as)
283	as_switch(old_as, new_as);
284
285	if (TASK)
286	task_release(TASK);
287
288	TASK = task;
289
290	task_hold(TASK);
291
292	before_task_runs_arch();
293	}
294
295	/** Prevent rq starvation
296	*
297	* Prevent low priority threads from starving in rq's.
298	*
299	* When the function decides to relink rq's, it reconnects
300	* respective pointers so that in result threads with 'pri'
301	* greater or equal start are moved to a higher-priority queue.
302	*
303	* @param start Threshold priority.
304	*
305	*/
306	static void relink_rq(int start)
307	{
308	if (CPU_LOCAL->current_clock_tick < CPU_LOCAL->relink_deadline)
309	return;
310
311	CPU_LOCAL->relink_deadline = CPU_LOCAL->current_clock_tick + NEEDS_RELINK_MAX;
312
313	/* Temporary cache for lists we are moving. */
314	list_t list;
315	list_initialize(&list);
316
317	size_t n = 0;
318
319	/* Move every list (except the one with highest priority) one level up. */
320	for (int i = RQ_COUNT - 1; i > start; i--) {
321	irq_spinlock_lock(&CPU->rq[i].lock, false);
322
323	/* Swap lists. */
324	list_swap(&CPU->rq[i].rq, &list);
325
326	/* Swap number of items. */
327	size_t tmpn = CPU->rq[i].n;
328	CPU->rq[i].n = n;
329	n = tmpn;
330
331	irq_spinlock_unlock(&CPU->rq[i].lock, false);
332	}
333
334	/* Append the contents of rq[start + 1] to rq[start]. */
335	if (n != 0) {
336	irq_spinlock_lock(&CPU->rq[start].lock, false);
337	list_concat(&CPU->rq[start].rq, &list);
338	CPU->rq[start].n += n;
339	irq_spinlock_unlock(&CPU->rq[start].lock, false);
340	}
341	}
342
343	void scheduler(void)
344	{
345	ipl_t ipl = interrupts_disable();
346
347	if (atomic_load(&haltstate))
348	halt();
349
350	if (THREAD) {
351	irq_spinlock_lock(&THREAD->lock, false);
352	}
353
354	scheduler_locked(ipl);
355	}
356
357	/** The scheduler
358	*
359	* The thread scheduling procedure.
360	* Passes control directly to
361	* scheduler_separated_stack().
362	*
363	*/
364	void scheduler_locked(ipl_t ipl)
365	{
366	assert(CPU != NULL);
367
368	if (THREAD) {
369	/* Update thread kernel accounting */
370	THREAD->kcycles += get_cycle() - THREAD->last_cycle;
371
372	#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
373	fpu_context_save(&THREAD->fpu_context);
374	#endif
375	if (!context_save(&THREAD->saved_context)) {
376	/*
377	* This is the place where threads leave scheduler();
378	*/
379
380	/* Save current CPU cycle */
381	THREAD->last_cycle = get_cycle();
382
383	irq_spinlock_unlock(&THREAD->lock, false);
384	interrupts_restore(THREAD->saved_ipl);
385
386	return;
387	}
388
389	/*
390	* Interrupt priority level of preempted thread is recorded
391	* here to facilitate scheduler() invocations from
392	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
393	*
394	*/
395	THREAD->saved_ipl = ipl;
396	}
397
398	/*
399	* Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
400	* and preemption counter. At this point CURRENT could be coming either
401	* from THREAD's or CPU's stack.
402	*
403	*/
404	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
405
406	/*
407	* We may not keep the old stack.
408	* Reason: If we kept the old stack and got blocked, for instance, in
409	* find_best_thread(), the old thread could get rescheduled by another
410	* CPU and overwrite the part of its own stack that was also used by
411	* the scheduler on this CPU.
412	*
413	* Moreover, we have to bypass the compiler-generated POP sequence
414	* which is fooled by SP being set to the very top of the stack.
415	* Therefore the scheduler() function continues in
416	* scheduler_separated_stack().
417	*
418	*/
419	context_t ctx;
420	context_save(&ctx);
421	context_set(&ctx, FADDR(scheduler_separated_stack),
422	(uintptr_t) CPU_LOCAL->stack, STACK_SIZE);
423	context_restore(&ctx);
424
425	/* Not reached */
426	}
427
428	/** Scheduler stack switch wrapper
429	*
430	* Second part of the scheduler() function
431	* using new stack. Handling the actual context
432	* switch to a new thread.
433	*
434	*/
435	void scheduler_separated_stack(void)
436	{
437	assert((!THREAD) \|\| (irq_spinlock_locked(&THREAD->lock)));
438	assert(CPU != NULL);
439	assert(interrupts_disabled());
440
441	if (THREAD) {
442	/* Must be run after the switch to scheduler stack */
443	after_thread_ran();
444
445	switch (THREAD->state) {
446	case Running:
447	irq_spinlock_unlock(&THREAD->lock, false);
448	thread_ready(THREAD);
449	break;
450
451	case Exiting:
452	irq_spinlock_unlock(&THREAD->lock, false);
453	waitq_close(&THREAD->join_wq);
454
455	/*
456	* Release the reference CPU has for the thread.
457	* If there are no other references (e.g. threads calling join),
458	* the thread structure is deallocated.
459	*/
460	thread_put(THREAD);
461	break;
462
463	case Sleeping:
464	/*
465	* Prefer the thread after it's woken up.
466	*/
467	THREAD->priority = -1;
468	irq_spinlock_unlock(&THREAD->lock, false);
469	break;
470
471	default:
472	/*
473	* Entering state is unexpected.
474	*/
475	panic("tid%" PRIu64 ": unexpected state %s.",
476	THREAD->tid, thread_states[THREAD->state]);
477	break;
478	}
479
480	THREAD = NULL;
481	}
482
483	int rq_index;
484	THREAD = find_best_thread(&rq_index);
485
486	relink_rq(rq_index);
487
488	switch_task(THREAD->task);
489
490	irq_spinlock_lock(&THREAD->lock, false);
491	THREAD->state = Running;
492
493	#ifdef SCHEDULER_VERBOSE
494	log(LF_OTHER, LVL_DEBUG,
495	"cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
496	", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
497	THREAD->ticks, atomic_load(&CPU->nrdy));
498	#endif
499
500	/*
501	* Some architectures provide late kernel PA2KA(identity)
502	* mapping in a page fault handler. However, the page fault
503	* handler uses the kernel stack of the running thread and
504	* therefore cannot be used to map it. The kernel stack, if
505	* necessary, is to be mapped in before_thread_runs(). This
506	* function must be executed before the switch to the new stack.
507	*/
508	before_thread_runs();
509
510	/*
511	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
512	* thread's stack.
513	*/
514	current_copy(CURRENT, (current_t *) THREAD->kstack);
515
516	context_restore(&THREAD->saved_context);
517
518	/* Not reached */
519	}
520
521	#ifdef CONFIG_SMP
522
523	static thread_t steal_thread_from(cpu_t old_cpu, int i)
524	{
525	runq_t *old_rq = &old_cpu->rq[i];
526	runq_t *new_rq = &CPU->rq[i];
527
528	ipl_t ipl = interrupts_disable();
529
530	irq_spinlock_lock(&old_rq->lock, false);
531
532	/*
533	* If fpu_owner is any thread in the list, its store is seen here thanks to
534	* the runqueue lock.
535	*/
536	thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
537	memory_order_relaxed);
538
539	/* Search rq from the back */
540	list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
541
542	irq_spinlock_lock(&thread->lock, false);
543
544	/*
545	* Do not steal CPU-wired threads, threads
546	* already stolen, threads for which migration
547	* was temporarily disabled or threads whose
548	* FPU context is still in the CPU.
549	*/
550	if (thread->stolen \|\| thread->nomigrate \|\|
551	thread == fpu_owner) {
552	irq_spinlock_unlock(&thread->lock, false);
553	continue;
554	}
555
556	thread->stolen = true;
557	thread->cpu = CPU;
558
559	irq_spinlock_unlock(&thread->lock, false);
560
561	/*
562	* Ready thread on local CPU
563	*/
564
565	#ifdef KCPULB_VERBOSE
566	log(LF_OTHER, LVL_DEBUG,
567	"kcpulb%u: TID %" PRIu64 " -> cpu%u, "
568	"nrdy=%ld, avg=%ld", CPU->id, thread->tid,
569	CPU->id, atomic_load(&CPU->nrdy),
570	atomic_load(&nrdy) / config.cpu_active);
571	#endif
572
573	/* Remove thread from ready queue. */
574	old_rq->n--;
575	list_remove(&thread->rq_link);
576	irq_spinlock_unlock(&old_rq->lock, false);
577
578	/* Append thread to local queue. */
579	irq_spinlock_lock(&new_rq->lock, false);
580	list_append(&thread->rq_link, &new_rq->rq);
581	new_rq->n++;
582	irq_spinlock_unlock(&new_rq->lock, false);
583
584	atomic_dec(&old_cpu->nrdy);
585	atomic_inc(&CPU->nrdy);
586	interrupts_restore(ipl);
587	return thread;
588	}
589
590	irq_spinlock_unlock(&old_rq->lock, false);
591	interrupts_restore(ipl);
592	return NULL;
593	}
594
595	/** Load balancing thread
596	*
597	* SMP load balancing thread, supervising thread supplies
598	* for the CPU it's wired to.
599	*
600	* @param arg Generic thread argument (unused).
601	*
602	*/
603	void kcpulb(void *arg)
604	{
605	size_t average;
606	size_t rdy;
607
608	loop:
609	/*
610	* Work in 1s intervals.
611	*/
612	thread_sleep(1);
613
614	not_satisfied:
615	/*
616	* Calculate the number of threads that will be migrated/stolen from
617	* other CPU's. Note that situation can have changed between two
618	* passes. Each time get the most up to date counts.
619	*
620	*/
621	average = atomic_load(&nrdy) / config.cpu_active + 1;
622	rdy = atomic_load(&CPU->nrdy);
623
624	if (average <= rdy)
625	goto satisfied;
626
627	size_t count = average - rdy;
628
629	/*
630	* Searching least priority queues on all CPU's first and most priority
631	* queues on all CPU's last.
632	*/
633	size_t acpu;
634	int rq;
635
636	for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
637	for (acpu = 0; acpu < config.cpu_active; acpu++) {
638	cpu_t *cpu = &cpus[acpu];
639
640	/*
641	* Not interested in ourselves.
642	* Doesn't require interrupt disabling for kcpulb has
643	* THREAD_FLAG_WIRED.
644	*
645	*/
646	if (CPU == cpu)
647	continue;
648
649	if (atomic_load(&cpu->nrdy) <= average)
650	continue;
651
652	if (steal_thread_from(cpu, rq) && --count == 0)
653	goto satisfied;
654	}
655	}
656
657	if (atomic_load(&CPU->nrdy)) {
658	/*
659	* Be a little bit light-weight and let migrated threads run.
660	*
661	*/
662	scheduler();
663	} else {
664	/*
665	* We failed to migrate a single thread.
666	* Give up this turn.
667	*
668	*/
669	goto loop;
670	}
671
672	goto not_satisfied;
673
674	satisfied:
675	goto loop;
676	}
677	#endif /* CONFIG_SMP */
678
679	/** Print information about threads & scheduler queues
680	*
681	*/
682	void sched_print_list(void)
683	{
684	size_t cpu;
685	for (cpu = 0; cpu < config.cpu_count; cpu++) {
686	if (!cpus[cpu].active)
687	continue;
688
689	printf("cpu%u: address=%p, nrdy=%zu\n",
690	cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy));
691
692	unsigned int i;
693	for (i = 0; i < RQ_COUNT; i++) {
694	irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
695	if (cpus[cpu].rq[i].n == 0) {
696	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
697	continue;
698	}
699
700	printf("\trq[%u]: ", i);
701	list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
702	thread) {
703	printf("%" PRIu64 "(%s) ", thread->tid,
704	thread_states[thread->state]);
705	}
706	printf("\n");
707
708	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
709	}
710	}
711	}
712
713	/** @}
714	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ 4760793

Download in other formats: