Context Navigation

scheduler.c@ 481d4751

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 481d4751 was 481d4751, checked in by Jakub Jermar <jakub@…>, 15 years ago

Fix a race condition between the scheduler and as_destroy().

It was possible for the scheduler to use page tables of an address space which
was already destroyed. Prevent this from happening by holding extra references
to the current task and the current address space in
scheduler_separated_stack().

Property mode set to 100644

File size: 16.8 KB

Line
1	/*
2	* Copyright (c) 2010 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup genericproc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <proc/scheduler.h>
42	#include <proc/thread.h>
43	#include <proc/task.h>
44	#include <mm/frame.h>
45	#include <mm/page.h>
46	#include <mm/as.h>
47	#include <time/timeout.h>
48	#include <time/delay.h>
49	#include <arch/asm.h>
50	#include <arch/faddr.h>
51	#include <arch/cycle.h>
52	#include <atomic.h>
53	#include <synch/spinlock.h>
54	#include <config.h>
55	#include <context.h>
56	#include <fpu_context.h>
57	#include <func.h>
58	#include <arch.h>
59	#include <adt/list.h>
60	#include <panic.h>
61	#include <cpu.h>
62	#include <print.h>
63	#include <debug.h>
64
65	static void before_task_runs(void);
66	static void before_thread_runs(void);
67	static void after_thread_ran(void);
68	static void scheduler_separated_stack(void);
69
70	atomic_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Carry out actions before new task runs. */
73	void before_task_runs(void)
74	{
75	before_task_runs_arch();
76	}
77
78	/** Take actions before new thread runs.
79	*
80	* Perform actions that need to be
81	* taken before the newly selected
82	* tread is passed control.
83	*
84	* THREAD->lock is locked on entry
85	*
86	*/
87	void before_thread_runs(void)
88	{
89	before_thread_runs_arch();
90	#ifdef CONFIG_FPU_LAZY
91	if(THREAD == CPU->fpu_owner)
92	fpu_enable();
93	else
94	fpu_disable();
95	#else
96	fpu_enable();
97	if (THREAD->fpu_context_exists)
98	fpu_context_restore(THREAD->saved_fpu_context);
99	else {
100	fpu_init();
101	THREAD->fpu_context_exists = 1;
102	}
103	#endif
104	}
105
106	/** Take actions after THREAD had run.
107	*
108	* Perform actions that need to be
109	* taken after the running thread
110	* had been preempted by the scheduler.
111	*
112	* THREAD->lock is locked on entry
113	*
114	*/
115	void after_thread_ran(void)
116	{
117	after_thread_ran_arch();
118	}
119
120	#ifdef CONFIG_FPU_LAZY
121	void scheduler_fpu_lazy_request(void)
122	{
123	restart:
124	fpu_enable();
125	spinlock_lock(&CPU->lock);
126
127	/* Save old context */
128	if (CPU->fpu_owner != NULL) {
129	spinlock_lock(&CPU->fpu_owner->lock);
130	fpu_context_save(CPU->fpu_owner->saved_fpu_context);
131	/* don't prevent migration */
132	CPU->fpu_owner->fpu_context_engaged = 0;
133	spinlock_unlock(&CPU->fpu_owner->lock);
134	CPU->fpu_owner = NULL;
135	}
136
137	spinlock_lock(&THREAD->lock);
138	if (THREAD->fpu_context_exists) {
139	fpu_context_restore(THREAD->saved_fpu_context);
140	} else {
141	/* Allocate FPU context */
142	if (!THREAD->saved_fpu_context) {
143	/* Might sleep */
144	spinlock_unlock(&THREAD->lock);
145	spinlock_unlock(&CPU->lock);
146	THREAD->saved_fpu_context =
147	(fpu_context_t *) slab_alloc(fpu_context_slab, 0);
148	/* We may have switched CPUs during slab_alloc */
149	goto restart;
150	}
151	fpu_init();
152	THREAD->fpu_context_exists = 1;
153	}
154	CPU->fpu_owner = THREAD;
155	THREAD->fpu_context_engaged = 1;
156	spinlock_unlock(&THREAD->lock);
157
158	spinlock_unlock(&CPU->lock);
159	}
160	#endif
161
162	/** Initialize scheduler
163	*
164	* Initialize kernel scheduler.
165	*
166	*/
167	void scheduler_init(void)
168	{
169	}
170
171	/** Get thread to be scheduled
172	*
173	* Get the optimal thread to be scheduled
174	* according to thread accounting and scheduler
175	* policy.
176	*
177	* @return Thread to be scheduled.
178	*
179	*/
180	static thread_t *find_best_thread(void)
181	{
182	thread_t *t;
183	runq_t *r;
184	int i;
185
186	ASSERT(CPU != NULL);
187
188	loop:
189
190	if (atomic_get(&CPU->nrdy) == 0) {
191	/*
192	* For there was nothing to run, the CPU goes to sleep
193	* until a hardware interrupt or an IPI comes.
194	* This improves energy saving and hyperthreading.
195	*/
196
197	/* Mark CPU as it was idle this clock tick */
198	spinlock_lock(&CPU->lock);
199	CPU->idle = true;
200	spinlock_unlock(&CPU->lock);
201
202	interrupts_enable();
203	/*
204	* An interrupt might occur right now and wake up a thread.
205	* In such case, the CPU will continue to go to sleep
206	* even though there is a runnable thread.
207	*/
208	cpu_sleep();
209	interrupts_disable();
210	goto loop;
211	}
212
213	for (i = 0; i < RQ_COUNT; i++) {
214	r = &CPU->rq[i];
215	spinlock_lock(&r->lock);
216	if (r->n == 0) {
217	/*
218	* If this queue is empty, try a lower-priority queue.
219	*/
220	spinlock_unlock(&r->lock);
221	continue;
222	}
223
224	atomic_dec(&CPU->nrdy);
225	atomic_dec(&nrdy);
226	r->n--;
227
228	/*
229	* Take the first thread from the queue.
230	*/
231	t = list_get_instance(r->rq_head.next, thread_t, rq_link);
232	list_remove(&t->rq_link);
233
234	spinlock_unlock(&r->lock);
235
236	spinlock_lock(&t->lock);
237	t->cpu = CPU;
238
239	t->ticks = us2ticks((i + 1) * 10000);
240	t->priority = i; /* correct rq index */
241
242	/*
243	* Clear the THREAD_FLAG_STOLEN flag so that t can be migrated
244	* when load balancing needs emerge.
245	*/
246	t->flags &= ~THREAD_FLAG_STOLEN;
247	spinlock_unlock(&t->lock);
248
249	return t;
250	}
251	goto loop;
252
253	}
254
255	/** Prevent rq starvation
256	*
257	* Prevent low priority threads from starving in rq's.
258	*
259	* When the function decides to relink rq's, it reconnects
260	* respective pointers so that in result threads with 'pri'
261	* greater or equal start are moved to a higher-priority queue.
262	*
263	* @param start Threshold priority.
264	*
265	*/
266	static void relink_rq(int start)
267	{
268	link_t head;
269	runq_t *r;
270	int i, n;
271
272	list_initialize(&head);
273	spinlock_lock(&CPU->lock);
274	if (CPU->needs_relink > NEEDS_RELINK_MAX) {
275	for (i = start; i < RQ_COUNT - 1; i++) {
276	/* remember and empty rq[i + 1] */
277	r = &CPU->rq[i + 1];
278	spinlock_lock(&r->lock);
279	list_concat(&head, &r->rq_head);
280	n = r->n;
281	r->n = 0;
282	spinlock_unlock(&r->lock);
283
284	/* append rq[i + 1] to rq[i] */
285	r = &CPU->rq[i];
286	spinlock_lock(&r->lock);
287	list_concat(&r->rq_head, &head);
288	r->n += n;
289	spinlock_unlock(&r->lock);
290	}
291	CPU->needs_relink = 0;
292	}
293	spinlock_unlock(&CPU->lock);
294
295	}
296
297	/** The scheduler
298	*
299	* The thread scheduling procedure.
300	* Passes control directly to
301	* scheduler_separated_stack().
302	*
303	*/
304	void scheduler(void)
305	{
306	volatile ipl_t ipl;
307
308	ASSERT(CPU != NULL);
309
310	ipl = interrupts_disable();
311
312	if (atomic_get(&haltstate))
313	halt();
314
315	if (THREAD) {
316	spinlock_lock(&THREAD->lock);
317
318	/* Update thread kernel accounting */
319	THREAD->kcycles += get_cycle() - THREAD->last_cycle;
320
321	#ifndef CONFIG_FPU_LAZY
322	fpu_context_save(THREAD->saved_fpu_context);
323	#endif
324	if (!context_save(&THREAD->saved_context)) {
325	/*
326	* This is the place where threads leave scheduler();
327	*/
328
329	/* Save current CPU cycle */
330	THREAD->last_cycle = get_cycle();
331
332	spinlock_unlock(&THREAD->lock);
333	interrupts_restore(THREAD->saved_context.ipl);
334
335	return;
336	}
337
338	/*
339	* Interrupt priority level of preempted thread is recorded
340	* here to facilitate scheduler() invocations from
341	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
342	*/
343	THREAD->saved_context.ipl = ipl;
344	}
345
346	/*
347	* Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
348	* and preemption counter. At this point THE could be coming either
349	* from THREAD's or CPU's stack.
350	*/
351	the_copy(THE, (the_t *) CPU->stack);
352
353	/*
354	* We may not keep the old stack.
355	* Reason: If we kept the old stack and got blocked, for instance, in
356	* find_best_thread(), the old thread could get rescheduled by another
357	* CPU and overwrite the part of its own stack that was also used by
358	* the scheduler on this CPU.
359	*
360	* Moreover, we have to bypass the compiler-generated POP sequence
361	* which is fooled by SP being set to the very top of the stack.
362	* Therefore the scheduler() function continues in
363	* scheduler_separated_stack().
364	*/
365	context_save(&CPU->saved_context);
366	context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
367	(uintptr_t) CPU->stack, CPU_STACK_SIZE);
368	context_restore(&CPU->saved_context);
369	/* not reached */
370	}
371
372	/** Scheduler stack switch wrapper
373	*
374	* Second part of the scheduler() function
375	* using new stack. Handling the actual context
376	* switch to a new thread.
377	*
378	* Assume THREAD->lock is held.
379	*/
380	void scheduler_separated_stack(void)
381	{
382	int priority;
383	DEADLOCK_PROBE_INIT(p_joinwq);
384	task_t *old_task = TASK;
385	as_t *old_as = AS;
386
387	ASSERT(CPU != NULL);
388
389	/*
390	* Hold the current task and the address space to prevent their
391	* possible destruction should thread_destroy() be called on this or any
392	* other processor while the scheduler is still using them.
393	*/
394	if (old_task)
395	task_hold(old_task);
396	if (old_as)
397	as_hold(old_as);
398
399	if (THREAD) {
400	/* must be run after the switch to scheduler stack */
401	after_thread_ran();
402
403	switch (THREAD->state) {
404	case Running:
405	spinlock_unlock(&THREAD->lock);
406	thread_ready(THREAD);
407	break;
408
409	case Exiting:
410	repeat:
411	if (THREAD->detached) {
412	thread_destroy(THREAD);
413	} else {
414	/*
415	* The thread structure is kept allocated until
416	* somebody calls thread_detach() on it.
417	*/
418	if (!spinlock_trylock(&THREAD->join_wq.lock)) {
419	/*
420	* Avoid deadlock.
421	*/
422	spinlock_unlock(&THREAD->lock);
423	delay(HZ);
424	spinlock_lock(&THREAD->lock);
425	DEADLOCK_PROBE(p_joinwq,
426	DEADLOCK_THRESHOLD);
427	goto repeat;
428	}
429	_waitq_wakeup_unsafe(&THREAD->join_wq,
430	WAKEUP_FIRST);
431	spinlock_unlock(&THREAD->join_wq.lock);
432
433	THREAD->state = Lingering;
434	spinlock_unlock(&THREAD->lock);
435	}
436	break;
437
438	case Sleeping:
439	/*
440	* Prefer the thread after it's woken up.
441	*/
442	THREAD->priority = -1;
443
444	/*
445	* We need to release wq->lock which we locked in
446	* waitq_sleep(). Address of wq->lock is kept in
447	* THREAD->sleep_queue.
448	*/
449	spinlock_unlock(&THREAD->sleep_queue->lock);
450
451	/*
452	* Check for possible requests for out-of-context
453	* invocation.
454	*/
455	if (THREAD->call_me) {
456	THREAD->call_me(THREAD->call_me_with);
457	THREAD->call_me = NULL;
458	THREAD->call_me_with = NULL;
459	}
460
461	spinlock_unlock(&THREAD->lock);
462
463	break;
464
465	default:
466	/*
467	* Entering state is unexpected.
468	*/
469	panic("tid%" PRIu64 ": unexpected state %s.",
470	THREAD->tid, thread_states[THREAD->state]);
471	break;
472	}
473
474	THREAD = NULL;
475	}
476
477	THREAD = find_best_thread();
478
479	spinlock_lock(&THREAD->lock);
480	priority = THREAD->priority;
481	spinlock_unlock(&THREAD->lock);
482
483	relink_rq(priority);
484
485	/*
486	* If both the old and the new task are the same, lots of work is
487	* avoided.
488	*/
489	if (TASK != THREAD->task) {
490	as_t *new_as = THREAD->task->as;
491
492	/*
493	* Note that it is possible for two tasks to share one address
494	* space.
495	*/
496	if (old_as != new_as) {
497	/*
498	* Both tasks and address spaces are different.
499	* Replace the old one with the new one.
500	*/
501	as_switch(old_as, new_as);
502	}
503
504	TASK = THREAD->task;
505	before_task_runs();
506	}
507
508	if (old_task)
509	task_release(old_task);
510	if (old_as)
511	as_release(old_as);
512
513	spinlock_lock(&THREAD->lock);
514	THREAD->state = Running;
515
516	#ifdef SCHEDULER_VERBOSE
517	printf("cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
518	", nrdy=%ld)\n", CPU->id, THREAD->tid, THREAD->priority,
519	THREAD->ticks, atomic_get(&CPU->nrdy));
520	#endif
521
522	/*
523	* Some architectures provide late kernel PA2KA(identity)
524	* mapping in a page fault handler. However, the page fault
525	* handler uses the kernel stack of the running thread and
526	* therefore cannot be used to map it. The kernel stack, if
527	* necessary, is to be mapped in before_thread_runs(). This
528	* function must be executed before the switch to the new stack.
529	*/
530	before_thread_runs();
531
532	/*
533	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
534	* thread's stack.
535	*/
536	the_copy(THE, (the_t *) THREAD->kstack);
537
538	context_restore(&THREAD->saved_context);
539	/* not reached */
540	}
541
542	#ifdef CONFIG_SMP
543	/** Load balancing thread
544	*
545	* SMP load balancing thread, supervising thread supplies
546	* for the CPU it's wired to.
547	*
548	* @param arg Generic thread argument (unused).
549	*
550	*/
551	void kcpulb(void *arg)
552	{
553	thread_t *t;
554	int count;
555	atomic_count_t average;
556	unsigned int i;
557	int j;
558	int k = 0;
559	ipl_t ipl;
560
561	/*
562	* Detach kcpulb as nobody will call thread_join_timeout() on it.
563	*/
564	thread_detach(THREAD);
565
566	loop:
567	/*
568	* Work in 1s intervals.
569	*/
570	thread_sleep(1);
571
572	not_satisfied:
573	/*
574	* Calculate the number of threads that will be migrated/stolen from
575	* other CPU's. Note that situation can have changed between two
576	* passes. Each time get the most up to date counts.
577	*/
578	average = atomic_get(&nrdy) / config.cpu_active + 1;
579	count = average - atomic_get(&CPU->nrdy);
580
581	if (count <= 0)
582	goto satisfied;
583
584	/*
585	* Searching least priority queues on all CPU's first and most priority
586	* queues on all CPU's last.
587	*/
588	for (j = RQ_COUNT - 1; j >= 0; j--) {
589	for (i = 0; i < config.cpu_active; i++) {
590	link_t *l;
591	runq_t *r;
592	cpu_t *cpu;
593
594	cpu = &cpus[(i + k) % config.cpu_active];
595
596	/*
597	* Not interested in ourselves.
598	* Doesn't require interrupt disabling for kcpulb has
599	* THREAD_FLAG_WIRED.
600	*/
601	if (CPU == cpu)
602	continue;
603	if (atomic_get(&cpu->nrdy) <= average)
604	continue;
605
606	ipl = interrupts_disable();
607	r = &cpu->rq[j];
608	spinlock_lock(&r->lock);
609	if (r->n == 0) {
610	spinlock_unlock(&r->lock);
611	interrupts_restore(ipl);
612	continue;
613	}
614
615	t = NULL;
616	l = r->rq_head.prev; /* search rq from the back */
617	while (l != &r->rq_head) {
618	t = list_get_instance(l, thread_t, rq_link);
619	/*
620	* We don't want to steal CPU-wired threads
621	* neither threads already stolen. The latter
622	* prevents threads from migrating between CPU's
623	* without ever being run. We don't want to
624	* steal threads whose FPU context is still in
625	* CPU.
626	*/
627	spinlock_lock(&t->lock);
628	if ((!(t->flags & (THREAD_FLAG_WIRED \|
629	THREAD_FLAG_STOLEN))) &&
630	(!(t->fpu_context_engaged))) {
631	/*
632	* Remove t from r.
633	*/
634	spinlock_unlock(&t->lock);
635
636	atomic_dec(&cpu->nrdy);
637	atomic_dec(&nrdy);
638
639	r->n--;
640	list_remove(&t->rq_link);
641
642	break;
643	}
644	spinlock_unlock(&t->lock);
645	l = l->prev;
646	t = NULL;
647	}
648	spinlock_unlock(&r->lock);
649
650	if (t) {
651	/*
652	* Ready t on local CPU
653	*/
654	spinlock_lock(&t->lock);
655	#ifdef KCPULB_VERBOSE
656	printf("kcpulb%u: TID %" PRIu64 " -> cpu%u, "
657	"nrdy=%ld, avg=%ld\n", CPU->id, t->tid,
658	CPU->id, atomic_get(&CPU->nrdy),
659	atomic_get(&nrdy) / config.cpu_active);
660	#endif
661	t->flags \|= THREAD_FLAG_STOLEN;
662	t->state = Entering;
663	spinlock_unlock(&t->lock);
664
665	thread_ready(t);
666
667	interrupts_restore(ipl);
668
669	if (--count == 0)
670	goto satisfied;
671
672	/*
673	* We are not satisfied yet, focus on another
674	* CPU next time.
675	*/
676	k++;
677
678	continue;
679	}
680	interrupts_restore(ipl);
681	}
682	}
683
684	if (atomic_get(&CPU->nrdy)) {
685	/*
686	* Be a little bit light-weight and let migrated threads run.
687	*/
688	scheduler();
689	} else {
690	/*
691	* We failed to migrate a single thread.
692	* Give up this turn.
693	*/
694	goto loop;
695	}
696
697	goto not_satisfied;
698
699	satisfied:
700	goto loop;
701	}
702
703	#endif /* CONFIG_SMP */
704
705
706	/** Print information about threads & scheduler queues */
707	void sched_print_list(void)
708	{
709	ipl_t ipl;
710	unsigned int cpu, i;
711	runq_t *r;
712	thread_t *t;
713	link_t *cur;
714
715	/* We are going to mess with scheduler structures,
716	* let's not be interrupted */
717	ipl = interrupts_disable();
718	for (cpu = 0; cpu < config.cpu_count; cpu++) {
719
720	if (!cpus[cpu].active)
721	continue;
722
723	spinlock_lock(&cpus[cpu].lock);
724	printf("cpu%u: address=%p, nrdy=%ld, needs_relink=%" PRIs "\n",
725	cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
726	cpus[cpu].needs_relink);
727
728	for (i = 0; i < RQ_COUNT; i++) {
729	r = &cpus[cpu].rq[i];
730	spinlock_lock(&r->lock);
731	if (!r->n) {
732	spinlock_unlock(&r->lock);
733	continue;
734	}
735	printf("\trq[%u]: ", i);
736	for (cur = r->rq_head.next; cur != &r->rq_head;
737	cur = cur->next) {
738	t = list_get_instance(cur, thread_t, rq_link);
739	printf("%" PRIu64 "(%s) ", t->tid,
740	thread_states[t->state]);
741	}
742	printf("\n");
743	spinlock_unlock(&r->lock);
744	}
745	spinlock_unlock(&cpus[cpu].lock);
746	}
747
748	interrupts_restore(ipl);
749	}
750
751	/** @}
752	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ 481d4751

Download in other formats: