Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ 3efc35a

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 3efc35a was ef9a2a8, checked in by Jakub Klama <jakub.klama@…>, 12 years ago
Introduce early MMU support in kernel. At current state, it is possible to create initial kernel address space, map kernel identity into it and take over MMU control. ASID FIFO support should also work.
Property mode set to `100644`
File size: 17.5 KB

Line
1	/*
2	* Copyright (c) 2010 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup genericproc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <proc/scheduler.h>
42	#include <proc/thread.h>
43	#include <proc/task.h>
44	#include <mm/frame.h>
45	#include <mm/page.h>
46	#include <mm/as.h>
47	#include <time/timeout.h>
48	#include <time/delay.h>
49	#include <arch/asm.h>
50	#include <arch/faddr.h>
51	#include <arch/cycle.h>
52	#include <atomic.h>
53	#include <synch/spinlock.h>
54	#include <config.h>
55	#include <context.h>
56	#include <fpu_context.h>
57	#include <func.h>
58	#include <arch.h>
59	#include <adt/list.h>
60	#include <panic.h>
61	#include <cpu.h>
62	#include <print.h>
63	#include <debug.h>
64	#include <stacktrace.h>
65
66	//#define SCHEDULER_VERBOSE 1
67
68	static void scheduler_separated_stack(void);
69
70	atomic_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Carry out actions before new task runs. */
73	static void before_task_runs(void)
74	{
75	before_task_runs_arch();
76	}
77
78	/** Take actions before new thread runs.
79	*
80	* Perform actions that need to be
81	* taken before the newly selected
82	* thread is passed control.
83	*
84	* THREAD->lock is locked on entry
85	*
86	*/
87	static void before_thread_runs(void)
88	{
89	before_thread_runs_arch();
90
91	#ifdef CONFIG_FPU_LAZY
92	if (THREAD == CPU->fpu_owner)
93	fpu_enable();
94	else
95	fpu_disable();
96	#elif defined CONFIG_FPU
97	fpu_enable();
98	if (THREAD->fpu_context_exists)
99	fpu_context_restore(THREAD->saved_fpu_context);
100	else {
101	fpu_init();
102	THREAD->fpu_context_exists = true;
103	}
104	#endif
105
106	#ifdef CONFIG_UDEBUG
107	if (THREAD->btrace) {
108	istate_t *istate = THREAD->udebug.uspace_state;
109	if (istate != NULL) {
110	printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
111	stack_trace_istate(istate);
112	}
113
114	THREAD->btrace = false;
115	}
116	#endif
117	}
118
119	/** Take actions after THREAD had run.
120	*
121	* Perform actions that need to be
122	* taken after the running thread
123	* had been preempted by the scheduler.
124	*
125	* THREAD->lock is locked on entry
126	*
127	*/
128	static void after_thread_ran(void)
129	{
130	after_thread_ran_arch();
131	}
132
133	#ifdef CONFIG_FPU_LAZY
134	void scheduler_fpu_lazy_request(void)
135	{
136	restart:
137	fpu_enable();
138	irq_spinlock_lock(&CPU->lock, false);
139
140	/* Save old context */
141	if (CPU->fpu_owner != NULL) {
142	irq_spinlock_lock(&CPU->fpu_owner->lock, false);
143	fpu_context_save(CPU->fpu_owner->saved_fpu_context);
144
145	/* Don't prevent migration */
146	CPU->fpu_owner->fpu_context_engaged = false;
147	irq_spinlock_unlock(&CPU->fpu_owner->lock, false);
148	CPU->fpu_owner = NULL;
149	}
150
151	irq_spinlock_lock(&THREAD->lock, false);
152	if (THREAD->fpu_context_exists) {
153	fpu_context_restore(THREAD->saved_fpu_context);
154	} else {
155	/* Allocate FPU context */
156	if (!THREAD->saved_fpu_context) {
157	/* Might sleep */
158	irq_spinlock_unlock(&THREAD->lock, false);
159	irq_spinlock_unlock(&CPU->lock, false);
160	THREAD->saved_fpu_context =
161	(fpu_context_t *) slab_alloc(fpu_context_slab, 0);
162
163	/* We may have switched CPUs during slab_alloc */
164	goto restart;
165	}
166	fpu_init();
167	THREAD->fpu_context_exists = true;
168	}
169
170	CPU->fpu_owner = THREAD;
171	THREAD->fpu_context_engaged = true;
172	irq_spinlock_unlock(&THREAD->lock, false);
173
174	irq_spinlock_unlock(&CPU->lock, false);
175	}
176	#endif /* CONFIG_FPU_LAZY */
177
178	/** Initialize scheduler
179	*
180	* Initialize kernel scheduler.
181	*
182	*/
183	void scheduler_init(void)
184	{
185	}
186
187	/** Get thread to be scheduled
188	*
189	* Get the optimal thread to be scheduled
190	* according to thread accounting and scheduler
191	* policy.
192	*
193	* @return Thread to be scheduled.
194	*
195	*/
196	static thread_t *find_best_thread(void)
197	{
198	ASSERT(CPU != NULL);
199
200	loop:
201
202	if (atomic_get(&CPU->nrdy) == 0) {
203	/*
204	* For there was nothing to run, the CPU goes to sleep
205	* until a hardware interrupt or an IPI comes.
206	* This improves energy saving and hyperthreading.
207	*/
208	irq_spinlock_lock(&CPU->lock, false);
209	CPU->idle = true;
210	irq_spinlock_unlock(&CPU->lock, false);
211	interrupts_enable();
212
213	/*
214	* An interrupt might occur right now and wake up a thread.
215	* In such case, the CPU will continue to go to sleep
216	* even though there is a runnable thread.
217	*/
218	cpu_sleep();
219	interrupts_disable();
220	goto loop;
221	}
222
223	unsigned int i;
224	for (i = 0; i < RQ_COUNT; i++) {
225	irq_spinlock_lock(&(CPU->rq[i].lock), false);
226	if (CPU->rq[i].n == 0) {
227	/*
228	* If this queue is empty, try a lower-priority queue.
229	*/
230	irq_spinlock_unlock(&(CPU->rq[i].lock), false);
231	continue;
232	}
233
234	atomic_dec(&CPU->nrdy);
235	atomic_dec(&nrdy);
236	CPU->rq[i].n--;
237
238	/*
239	* Take the first thread from the queue.
240	*/
241	thread_t *thread = list_get_instance(
242	list_first(&CPU->rq[i].rq), thread_t, rq_link);
243	list_remove(&thread->rq_link);
244
245	irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
246
247	thread->cpu = CPU;
248	thread->ticks = us2ticks((i + 1) * 10000);
249	thread->priority = i; /* Correct rq index */
250
251	/*
252	* Clear the stolen flag so that it can be migrated
253	* when load balancing needs emerge.
254	*/
255	thread->stolen = false;
256	irq_spinlock_unlock(&thread->lock, false);
257
258	return thread;
259	}
260
261	goto loop;
262	}
263
264	/** Prevent rq starvation
265	*
266	* Prevent low priority threads from starving in rq's.
267	*
268	* When the function decides to relink rq's, it reconnects
269	* respective pointers so that in result threads with 'pri'
270	* greater or equal start are moved to a higher-priority queue.
271	*
272	* @param start Threshold priority.
273	*
274	*/
275	static void relink_rq(int start)
276	{
277	list_t list;
278
279	list_initialize(&list);
280	irq_spinlock_lock(&CPU->lock, false);
281
282	if (CPU->needs_relink > NEEDS_RELINK_MAX) {
283	int i;
284	for (i = start; i < RQ_COUNT - 1; i++) {
285	/* Remember and empty rq[i + 1] */
286
287	irq_spinlock_lock(&CPU->rq[i + 1].lock, false);
288	list_concat(&list, &CPU->rq[i + 1].rq);
289	size_t n = CPU->rq[i + 1].n;
290	CPU->rq[i + 1].n = 0;
291	irq_spinlock_unlock(&CPU->rq[i + 1].lock, false);
292
293	/* Append rq[i + 1] to rq[i] */
294
295	irq_spinlock_lock(&CPU->rq[i].lock, false);
296	list_concat(&CPU->rq[i].rq, &list);
297	CPU->rq[i].n += n;
298	irq_spinlock_unlock(&CPU->rq[i].lock, false);
299	}
300
301	CPU->needs_relink = 0;
302	}
303
304	irq_spinlock_unlock(&CPU->lock, false);
305	}
306
307	/** The scheduler
308	*
309	* The thread scheduling procedure.
310	* Passes control directly to
311	* scheduler_separated_stack().
312	*
313	*/
314	void scheduler(void)
315	{
316	volatile ipl_t ipl;
317
318	ASSERT(CPU != NULL);
319
320	ipl = interrupts_disable();
321
322	if (atomic_get(&haltstate))
323	halt();
324
325	if (THREAD) {
326	irq_spinlock_lock(&THREAD->lock, false);
327
328	/* Update thread kernel accounting */
329	THREAD->kcycles += get_cycle() - THREAD->last_cycle;
330
331	#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
332	fpu_context_save(THREAD->saved_fpu_context);
333	#endif
334	if (!context_save(&THREAD->saved_context)) {
335	/*
336	* This is the place where threads leave scheduler();
337	*/
338
339	/* Save current CPU cycle */
340	THREAD->last_cycle = get_cycle();
341
342	irq_spinlock_unlock(&THREAD->lock, false);
343	interrupts_restore(THREAD->saved_context.ipl);
344
345	return;
346	}
347
348	/*
349	* Interrupt priority level of preempted thread is recorded
350	* here to facilitate scheduler() invocations from
351	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
352	*
353	*/
354	THREAD->saved_context.ipl = ipl;
355	}
356
357	/*
358	* Through the 'THE' structure, we keep track of THREAD, TASK, CPU, AS
359	* and preemption counter. At this point THE could be coming either
360	* from THREAD's or CPU's stack.
361	*
362	*/
363	the_copy(THE, (the_t *) CPU->stack);
364
365	/*
366	* We may not keep the old stack.
367	* Reason: If we kept the old stack and got blocked, for instance, in
368	* find_best_thread(), the old thread could get rescheduled by another
369	* CPU and overwrite the part of its own stack that was also used by
370	* the scheduler on this CPU.
371	*
372	* Moreover, we have to bypass the compiler-generated POP sequence
373	* which is fooled by SP being set to the very top of the stack.
374	* Therefore the scheduler() function continues in
375	* scheduler_separated_stack().
376	*
377	*/
378	context_save(&CPU->saved_context);
379	context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
380	(uintptr_t) CPU->stack, STACK_SIZE);
381	context_restore(&CPU->saved_context);
382
383	/* Not reached */
384	}
385
386	/** Scheduler stack switch wrapper
387	*
388	* Second part of the scheduler() function
389	* using new stack. Handling the actual context
390	* switch to a new thread.
391	*
392	*/
393	void scheduler_separated_stack(void)
394	{
395	DEADLOCK_PROBE_INIT(p_joinwq);
396	task_t *old_task = TASK;
397	as_t *old_as = AS;
398
399	ASSERT((!THREAD) \|\| (irq_spinlock_locked(&THREAD->lock)));
400	ASSERT(CPU != NULL);
401
402	/*
403	* Hold the current task and the address space to prevent their
404	* possible destruction should thread_destroy() be called on this or any
405	* other processor while the scheduler is still using them.
406	*/
407	if (old_task)
408	task_hold(old_task);
409
410	if (old_as)
411	as_hold(old_as);
412
413	if (THREAD) {
414	/* Must be run after the switch to scheduler stack */
415	after_thread_ran();
416
417	switch (THREAD->state) {
418	case Running:
419	irq_spinlock_unlock(&THREAD->lock, false);
420	thread_ready(THREAD);
421	break;
422
423	case Exiting:
424	repeat:
425	if (THREAD->detached) {
426	thread_destroy(THREAD, false);
427	} else {
428	/*
429	* The thread structure is kept allocated until
430	* somebody calls thread_detach() on it.
431	*/
432	if (!irq_spinlock_trylock(&THREAD->join_wq.lock)) {
433	/*
434	* Avoid deadlock.
435	*/
436	irq_spinlock_unlock(&THREAD->lock, false);
437	delay(HZ);
438	irq_spinlock_lock(&THREAD->lock, false);
439	DEADLOCK_PROBE(p_joinwq,
440	DEADLOCK_THRESHOLD);
441	goto repeat;
442	}
443	_waitq_wakeup_unsafe(&THREAD->join_wq,
444	WAKEUP_FIRST);
445	irq_spinlock_unlock(&THREAD->join_wq.lock, false);
446
447	THREAD->state = Lingering;
448	irq_spinlock_unlock(&THREAD->lock, false);
449	}
450	break;
451
452	case Sleeping:
453	/*
454	* Prefer the thread after it's woken up.
455	*/
456	THREAD->priority = -1;
457
458	/*
459	* We need to release wq->lock which we locked in
460	* waitq_sleep(). Address of wq->lock is kept in
461	* THREAD->sleep_queue.
462	*/
463	irq_spinlock_unlock(&THREAD->sleep_queue->lock, false);
464
465	irq_spinlock_unlock(&THREAD->lock, false);
466	break;
467
468	default:
469	/*
470	* Entering state is unexpected.
471	*/
472	panic("tid%" PRIu64 ": unexpected state %s.",
473	THREAD->tid, thread_states[THREAD->state]);
474	break;
475	}
476
477	THREAD = NULL;
478	}
479
480	THREAD = find_best_thread();
481
482	irq_spinlock_lock(&THREAD->lock, false);
483	int priority = THREAD->priority;
484	irq_spinlock_unlock(&THREAD->lock, false);
485
486	relink_rq(priority);
487
488	/*
489	* If both the old and the new task are the same,
490	* lots of work is avoided.
491	*/
492	if (TASK != THREAD->task) {
493	as_t *new_as = THREAD->task->as;
494
495	/*
496	* Note that it is possible for two tasks
497	* to share one address space.
498	*/
499	if (old_as != new_as) {
500	/*
501	* Both tasks and address spaces are different.
502	* Replace the old one with the new one.
503	*/
504	as_switch(old_as, new_as);
505	}
506
507	TASK = THREAD->task;
508	before_task_runs();
509	}
510
511	if (old_task)
512	task_release(old_task);
513
514	if (old_as)
515	as_release(old_as);
516
517	irq_spinlock_lock(&THREAD->lock, false);
518	THREAD->state = Running;
519
520	#ifdef SCHEDULER_VERBOSE
521	printf("cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
522	", nrdy=%d)\n", CPU->id, THREAD->tid, THREAD->priority,
523	THREAD->ticks, atomic_get(&CPU->nrdy));
524	#endif
525
526	/*
527	* Some architectures provide late kernel PA2KA(identity)
528	* mapping in a page fault handler. However, the page fault
529	* handler uses the kernel stack of the running thread and
530	* therefore cannot be used to map it. The kernel stack, if
531	* necessary, is to be mapped in before_thread_runs(). This
532	* function must be executed before the switch to the new stack.
533	*/
534	before_thread_runs();
535
536	/*
537	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
538	* thread's stack.
539	*/
540	the_copy(THE, (the_t *) THREAD->kstack);
541
542	context_restore(&THREAD->saved_context);
543
544	/* Not reached */
545	}
546
547	#ifdef CONFIG_SMP
548	/** Load balancing thread
549	*
550	* SMP load balancing thread, supervising thread supplies
551	* for the CPU it's wired to.
552	*
553	* @param arg Generic thread argument (unused).
554	*
555	*/
556	void kcpulb(void *arg)
557	{
558	atomic_count_t average;
559	atomic_count_t rdy;
560
561	/*
562	* Detach kcpulb as nobody will call thread_join_timeout() on it.
563	*/
564	thread_detach(THREAD);
565
566	loop:
567	/*
568	* Work in 1s intervals.
569	*/
570	thread_sleep(1);
571
572	not_satisfied:
573	/*
574	* Calculate the number of threads that will be migrated/stolen from
575	* other CPU's. Note that situation can have changed between two
576	* passes. Each time get the most up to date counts.
577	*
578	*/
579	average = atomic_get(&nrdy) / config.cpu_active + 1;
580	rdy = atomic_get(&CPU->nrdy);
581
582	if (average <= rdy)
583	goto satisfied;
584
585	atomic_count_t count = average - rdy;
586
587	/*
588	* Searching least priority queues on all CPU's first and most priority
589	* queues on all CPU's last.
590	*/
591	size_t acpu;
592	size_t acpu_bias = 0;
593	int rq;
594
595	for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
596	for (acpu = 0; acpu < config.cpu_active; acpu++) {
597	cpu_t *cpu = &cpus[(acpu + acpu_bias) % config.cpu_active];
598
599	/*
600	* Not interested in ourselves.
601	* Doesn't require interrupt disabling for kcpulb has
602	* THREAD_FLAG_WIRED.
603	*
604	*/
605	if (CPU == cpu)
606	continue;
607
608	if (atomic_get(&cpu->nrdy) <= average)
609	continue;
610
611	irq_spinlock_lock(&(cpu->rq[rq].lock), true);
612	if (cpu->rq[rq].n == 0) {
613	irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
614	continue;
615	}
616
617	thread_t *thread = NULL;
618
619	/* Search rq from the back */
620	link_t *link = cpu->rq[rq].rq.head.prev;
621
622	while (link != &(cpu->rq[rq].rq.head)) {
623	thread = (thread_t *) list_get_instance(link,
624	thread_t, rq_link);
625
626	/*
627	* Do not steal CPU-wired threads, threads
628	* already stolen, threads for which migration
629	* was temporarily disabled or threads whose
630	* FPU context is still in the CPU.
631	*/
632	irq_spinlock_lock(&thread->lock, false);
633
634	if ((!thread->wired) && (!thread->stolen) &&
635	(!thread->nomigrate) &&
636	(!thread->fpu_context_engaged)) {
637	/*
638	* Remove thread from ready queue.
639	*/
640	irq_spinlock_unlock(&thread->lock,
641	false);
642
643	atomic_dec(&cpu->nrdy);
644	atomic_dec(&nrdy);
645
646	cpu->rq[rq].n--;
647	list_remove(&thread->rq_link);
648
649	break;
650	}
651
652	irq_spinlock_unlock(&thread->lock, false);
653
654	link = link->prev;
655	thread = NULL;
656	}
657
658	if (thread) {
659	/*
660	* Ready thread on local CPU
661	*/
662
663	irq_spinlock_pass(&(cpu->rq[rq].lock),
664	&thread->lock);
665
666	#ifdef KCPULB_VERBOSE
667	printf("kcpulb%u: TID %" PRIu64 " -> cpu%u, "
668	"nrdy=%ld, avg=%ld\n", CPU->id, t->tid,
669	CPU->id, atomic_get(&CPU->nrdy),
670	atomic_get(&nrdy) / config.cpu_active);
671	#endif
672
673	thread->stolen = true;
674	thread->state = Entering;
675
676	irq_spinlock_unlock(&thread->lock, true);
677	thread_ready(thread);
678
679	if (--count == 0)
680	goto satisfied;
681
682	/*
683	* We are not satisfied yet, focus on another
684	* CPU next time.
685	*
686	*/
687	acpu_bias++;
688
689	continue;
690	} else
691	irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
692
693	}
694	}
695
696	if (atomic_get(&CPU->nrdy)) {
697	/*
698	* Be a little bit light-weight and let migrated threads run.
699	*
700	*/
701	scheduler();
702	} else {
703	/*
704	* We failed to migrate a single thread.
705	* Give up this turn.
706	*
707	*/
708	goto loop;
709	}
710
711	goto not_satisfied;
712
713	satisfied:
714	goto loop;
715	}
716	#endif /* CONFIG_SMP */
717
718	/** Print information about threads & scheduler queues
719	*
720	*/
721	void sched_print_list(void)
722	{
723	size_t cpu;
724	for (cpu = 0; cpu < config.cpu_count; cpu++) {
725	if (!cpus[cpu].active)
726	continue;
727
728	irq_spinlock_lock(&cpus[cpu].lock, true);
729
730	printf("cpu%u: address=%p, nrdy=%" PRIua ", needs_relink=%zu\n",
731	cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
732	cpus[cpu].needs_relink);
733
734	unsigned int i;
735	for (i = 0; i < RQ_COUNT; i++) {
736	irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
737	if (cpus[cpu].rq[i].n == 0) {
738	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
739	continue;
740	}
741
742	printf("\trq[%u]: ", i);
743	list_foreach(cpus[cpu].rq[i].rq, cur) {
744	thread_t *thread = list_get_instance(cur,
745	thread_t, rq_link);
746	printf("%" PRIu64 "(%s) ", thread->tid,
747	thread_states[thread->state]);
748	}
749	printf("\n");
750
751	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
752	}
753
754	irq_spinlock_unlock(&cpus[cpu].lock, true);
755	}
756	}
757
758	/** @}
759	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: