Context Navigation

scheduler.c@ cb4afd8b

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since cb4afd8b was ea7890e7, checked in by Jakub Jermar <jakub@…>, 18 years ago

More efficient and simpler task termination.

Based on the assumption, that after its creation, only the task itself can create more threads for itself,
the last thread with userspace context to execute thread_exit() will perform futex and IPC cleanup. When
the task has no threads, it is destroyed. Both the cleanup and destruction is controlled by reference
counting.

As for userspace threads, even though there could be a global garbage collector for joining threads, it is
much simpler if the uinit thread detaches itself before switching to userspace.

task_kill() is now an idempotent operation. It just instructs the threads within a task to exit.

Change in the name of a thread state: Undead → JoinMe.

Property mode set to 100644

File size: 16.4 KB

Line
1	/*
2	* Copyright (c) 2001-2007 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup genericproc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <proc/scheduler.h>
42	#include <proc/thread.h>
43	#include <proc/task.h>
44	#include <mm/frame.h>
45	#include <mm/page.h>
46	#include <mm/as.h>
47	#include <time/timeout.h>
48	#include <time/delay.h>
49	#include <arch/asm.h>
50	#include <arch/faddr.h>
51	#include <arch/cycle.h>
52	#include <atomic.h>
53	#include <synch/spinlock.h>
54	#include <config.h>
55	#include <context.h>
56	#include <fpu_context.h>
57	#include <func.h>
58	#include <arch.h>
59	#include <adt/list.h>
60	#include <panic.h>
61	#include <cpu.h>
62	#include <print.h>
63	#include <debug.h>
64
65	static void before_task_runs(void);
66	static void before_thread_runs(void);
67	static void after_thread_ran(void);
68	static void scheduler_separated_stack(void);
69
70	atomic_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Carry out actions before new task runs. */
73	void before_task_runs(void)
74	{
75	before_task_runs_arch();
76	}
77
78	/** Take actions before new thread runs.
79	*
80	* Perform actions that need to be
81	* taken before the newly selected
82	* tread is passed control.
83	*
84	* THREAD->lock is locked on entry
85	*
86	*/
87	void before_thread_runs(void)
88	{
89	before_thread_runs_arch();
90	#ifdef CONFIG_FPU_LAZY
91	if(THREAD == CPU->fpu_owner)
92	fpu_enable();
93	else
94	fpu_disable();
95	#else
96	fpu_enable();
97	if (THREAD->fpu_context_exists)
98	fpu_context_restore(THREAD->saved_fpu_context);
99	else {
100	fpu_init();
101	THREAD->fpu_context_exists = 1;
102	}
103	#endif
104	}
105
106	/** Take actions after THREAD had run.
107	*
108	* Perform actions that need to be
109	* taken after the running thread
110	* had been preempted by the scheduler.
111	*
112	* THREAD->lock is locked on entry
113	*
114	*/
115	void after_thread_ran(void)
116	{
117	after_thread_ran_arch();
118	}
119
120	#ifdef CONFIG_FPU_LAZY
121	void scheduler_fpu_lazy_request(void)
122	{
123	restart:
124	fpu_enable();
125	spinlock_lock(&CPU->lock);
126
127	/* Save old context */
128	if (CPU->fpu_owner != NULL) {
129	spinlock_lock(&CPU->fpu_owner->lock);
130	fpu_context_save(CPU->fpu_owner->saved_fpu_context);
131	/* don't prevent migration */
132	CPU->fpu_owner->fpu_context_engaged = 0;
133	spinlock_unlock(&CPU->fpu_owner->lock);
134	CPU->fpu_owner = NULL;
135	}
136
137	spinlock_lock(&THREAD->lock);
138	if (THREAD->fpu_context_exists) {
139	fpu_context_restore(THREAD->saved_fpu_context);
140	} else {
141	/* Allocate FPU context */
142	if (!THREAD->saved_fpu_context) {
143	/* Might sleep */
144	spinlock_unlock(&THREAD->lock);
145	spinlock_unlock(&CPU->lock);
146	THREAD->saved_fpu_context =
147	(fpu_context_t *) slab_alloc(fpu_context_slab, 0);
148	/* We may have switched CPUs during slab_alloc */
149	goto restart;
150	}
151	fpu_init();
152	THREAD->fpu_context_exists = 1;
153	}
154	CPU->fpu_owner = THREAD;
155	THREAD->fpu_context_engaged = 1;
156	spinlock_unlock(&THREAD->lock);
157
158	spinlock_unlock(&CPU->lock);
159	}
160	#endif
161
162	/** Initialize scheduler
163	*
164	* Initialize kernel scheduler.
165	*
166	*/
167	void scheduler_init(void)
168	{
169	}
170
171	/** Get thread to be scheduled
172	*
173	* Get the optimal thread to be scheduled
174	* according to thread accounting and scheduler
175	* policy.
176	*
177	* @return Thread to be scheduled.
178	*
179	*/
180	static thread_t *find_best_thread(void)
181	{
182	thread_t *t;
183	runq_t *r;
184	int i;
185
186	ASSERT(CPU != NULL);
187
188	loop:
189	interrupts_enable();
190
191	if (atomic_get(&CPU->nrdy) == 0) {
192	/*
193	* For there was nothing to run, the CPU goes to sleep
194	* until a hardware interrupt or an IPI comes.
195	* This improves energy saving and hyperthreading.
196	*/
197
198	/*
199	* An interrupt might occur right now and wake up a thread.
200	* In such case, the CPU will continue to go to sleep
201	* even though there is a runnable thread.
202	*/
203
204	cpu_sleep();
205	goto loop;
206	}
207
208	interrupts_disable();
209
210	for (i = 0; i < RQ_COUNT; i++) {
211	r = &CPU->rq[i];
212	spinlock_lock(&r->lock);
213	if (r->n == 0) {
214	/*
215	* If this queue is empty, try a lower-priority queue.
216	*/
217	spinlock_unlock(&r->lock);
218	continue;
219	}
220
221	atomic_dec(&CPU->nrdy);
222	atomic_dec(&nrdy);
223	r->n--;
224
225	/*
226	* Take the first thread from the queue.
227	*/
228	t = list_get_instance(r->rq_head.next, thread_t, rq_link);
229	list_remove(&t->rq_link);
230
231	spinlock_unlock(&r->lock);
232
233	spinlock_lock(&t->lock);
234	t->cpu = CPU;
235
236	t->ticks = us2ticks((i + 1) * 10000);
237	t->priority = i; /* correct rq index */
238
239	/*
240	* Clear the THREAD_FLAG_STOLEN flag so that t can be migrated
241	* when load balancing needs emerge.
242	*/
243	t->flags &= ~THREAD_FLAG_STOLEN;
244	spinlock_unlock(&t->lock);
245
246	return t;
247	}
248	goto loop;
249
250	}
251
252	/** Prevent rq starvation
253	*
254	* Prevent low priority threads from starving in rq's.
255	*
256	* When the function decides to relink rq's, it reconnects
257	* respective pointers so that in result threads with 'pri'
258	* greater or equal start are moved to a higher-priority queue.
259	*
260	* @param start Threshold priority.
261	*
262	*/
263	static void relink_rq(int start)
264	{
265	link_t head;
266	runq_t *r;
267	int i, n;
268
269	list_initialize(&head);
270	spinlock_lock(&CPU->lock);
271	if (CPU->needs_relink > NEEDS_RELINK_MAX) {
272	for (i = start; i < RQ_COUNT - 1; i++) {
273	/* remember and empty rq[i + 1] */
274	r = &CPU->rq[i + 1];
275	spinlock_lock(&r->lock);
276	list_concat(&head, &r->rq_head);
277	n = r->n;
278	r->n = 0;
279	spinlock_unlock(&r->lock);
280
281	/* append rq[i + 1] to rq[i] */
282	r = &CPU->rq[i];
283	spinlock_lock(&r->lock);
284	list_concat(&r->rq_head, &head);
285	r->n += n;
286	spinlock_unlock(&r->lock);
287	}
288	CPU->needs_relink = 0;
289	}
290	spinlock_unlock(&CPU->lock);
291
292	}
293
294	/** The scheduler
295	*
296	* The thread scheduling procedure.
297	* Passes control directly to
298	* scheduler_separated_stack().
299	*
300	*/
301	void scheduler(void)
302	{
303	volatile ipl_t ipl;
304
305	ASSERT(CPU != NULL);
306
307	ipl = interrupts_disable();
308
309	if (atomic_get(&haltstate))
310	halt();
311
312	if (THREAD) {
313	spinlock_lock(&THREAD->lock);
314
315	/* Update thread accounting */
316	THREAD->cycles += get_cycle() - THREAD->last_cycle;
317
318	#ifndef CONFIG_FPU_LAZY
319	fpu_context_save(THREAD->saved_fpu_context);
320	#endif
321	if (!context_save(&THREAD->saved_context)) {
322	/*
323	* This is the place where threads leave scheduler();
324	*/
325
326	/* Save current CPU cycle */
327	THREAD->last_cycle = get_cycle();
328
329	spinlock_unlock(&THREAD->lock);
330	interrupts_restore(THREAD->saved_context.ipl);
331
332	return;
333	}
334
335	/*
336	* Interrupt priority level of preempted thread is recorded
337	* here to facilitate scheduler() invocations from
338	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
339	*/
340	THREAD->saved_context.ipl = ipl;
341	}
342
343	/*
344	* Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
345	* and preemption counter. At this point THE could be coming either
346	* from THREAD's or CPU's stack.
347	*/
348	the_copy(THE, (the_t *) CPU->stack);
349
350	/*
351	* We may not keep the old stack.
352	* Reason: If we kept the old stack and got blocked, for instance, in
353	* find_best_thread(), the old thread could get rescheduled by another
354	* CPU and overwrite the part of its own stack that was also used by
355	* the scheduler on this CPU.
356	*
357	* Moreover, we have to bypass the compiler-generated POP sequence
358	* which is fooled by SP being set to the very top of the stack.
359	* Therefore the scheduler() function continues in
360	* scheduler_separated_stack().
361	*/
362	context_save(&CPU->saved_context);
363	context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
364	(uintptr_t) CPU->stack, CPU_STACK_SIZE);
365	context_restore(&CPU->saved_context);
366	/* not reached */
367	}
368
369	/** Scheduler stack switch wrapper
370	*
371	* Second part of the scheduler() function
372	* using new stack. Handling the actual context
373	* switch to a new thread.
374	*
375	* Assume THREAD->lock is held.
376	*/
377	void scheduler_separated_stack(void)
378	{
379	int priority;
380	DEADLOCK_PROBE_INIT(p_joinwq);
381
382	ASSERT(CPU != NULL);
383
384	if (THREAD) {
385	/* must be run after the switch to scheduler stack */
386	after_thread_ran();
387
388	switch (THREAD->state) {
389	case Running:
390	spinlock_unlock(&THREAD->lock);
391	thread_ready(THREAD);
392	break;
393
394	case Exiting:
395	repeat:
396	if (THREAD->detached) {
397	thread_destroy(THREAD);
398	} else {
399	/*
400	* The thread structure is kept allocated until
401	* somebody calls thread_detach() on it.
402	*/
403	if (!spinlock_trylock(&THREAD->join_wq.lock)) {
404	/*
405	* Avoid deadlock.
406	*/
407	spinlock_unlock(&THREAD->lock);
408	delay(HZ);
409	spinlock_lock(&THREAD->lock);
410	DEADLOCK_PROBE(p_joinwq,
411	DEADLOCK_THRESHOLD);
412	goto repeat;
413	}
414	_waitq_wakeup_unsafe(&THREAD->join_wq,
415	WAKEUP_FIRST);
416	spinlock_unlock(&THREAD->join_wq.lock);
417
418	THREAD->state = JoinMe;
419	spinlock_unlock(&THREAD->lock);
420	}
421	break;
422
423	case Sleeping:
424	/*
425	* Prefer the thread after it's woken up.
426	*/
427	THREAD->priority = -1;
428
429	/*
430	* We need to release wq->lock which we locked in
431	* waitq_sleep(). Address of wq->lock is kept in
432	* THREAD->sleep_queue.
433	*/
434	spinlock_unlock(&THREAD->sleep_queue->lock);
435
436	/*
437	* Check for possible requests for out-of-context
438	* invocation.
439	*/
440	if (THREAD->call_me) {
441	THREAD->call_me(THREAD->call_me_with);
442	THREAD->call_me = NULL;
443	THREAD->call_me_with = NULL;
444	}
445
446	spinlock_unlock(&THREAD->lock);
447
448	break;
449
450	default:
451	/*
452	* Entering state is unexpected.
453	*/
454	panic("tid%llu: unexpected state %s\n", THREAD->tid,
455	thread_states[THREAD->state]);
456	break;
457	}
458
459	THREAD = NULL;
460	}
461
462	THREAD = find_best_thread();
463
464	spinlock_lock(&THREAD->lock);
465	priority = THREAD->priority;
466	spinlock_unlock(&THREAD->lock);
467
468	relink_rq(priority);
469
470	/*
471	* If both the old and the new task are the same, lots of work is
472	* avoided.
473	*/
474	if (TASK != THREAD->task) {
475	as_t *as1 = NULL;
476	as_t *as2;
477
478	if (TASK) {
479	spinlock_lock(&TASK->lock);
480	as1 = TASK->as;
481	spinlock_unlock(&TASK->lock);
482	}
483
484	spinlock_lock(&THREAD->task->lock);
485	as2 = THREAD->task->as;
486	spinlock_unlock(&THREAD->task->lock);
487
488	/*
489	* Note that it is possible for two tasks to share one address
490	* space.
491	*/
492	if (as1 != as2) {
493	/*
494	* Both tasks and address spaces are different.
495	* Replace the old one with the new one.
496	*/
497	as_switch(as1, as2);
498	}
499	TASK = THREAD->task;
500	before_task_runs();
501	}
502
503	spinlock_lock(&THREAD->lock);
504	THREAD->state = Running;
505
506	#ifdef SCHEDULER_VERBOSE
507	printf("cpu%d: tid %llu (priority=%d, ticks=%llu, nrdy=%ld)\n",
508	CPU->id, THREAD->tid, THREAD->priority, THREAD->ticks,
509	atomic_get(&CPU->nrdy));
510	#endif
511
512	/*
513	* Some architectures provide late kernel PA2KA(identity)
514	* mapping in a page fault handler. However, the page fault
515	* handler uses the kernel stack of the running thread and
516	* therefore cannot be used to map it. The kernel stack, if
517	* necessary, is to be mapped in before_thread_runs(). This
518	* function must be executed before the switch to the new stack.
519	*/
520	before_thread_runs();
521
522	/*
523	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
524	* thread's stack.
525	*/
526	the_copy(THE, (the_t *) THREAD->kstack);
527
528	context_restore(&THREAD->saved_context);
529	/* not reached */
530	}
531
532	#ifdef CONFIG_SMP
533	/** Load balancing thread
534	*
535	* SMP load balancing thread, supervising thread supplies
536	* for the CPU it's wired to.
537	*
538	* @param arg Generic thread argument (unused).
539	*
540	*/
541	void kcpulb(void *arg)
542	{
543	thread_t *t;
544	int count, average, j, k = 0;
545	unsigned int i;
546	ipl_t ipl;
547
548	/*
549	* Detach kcpulb as nobody will call thread_join_timeout() on it.
550	*/
551	thread_detach(THREAD);
552
553	loop:
554	/*
555	* Work in 1s intervals.
556	*/
557	thread_sleep(1);
558
559	not_satisfied:
560	/*
561	* Calculate the number of threads that will be migrated/stolen from
562	* other CPU's. Note that situation can have changed between two
563	* passes. Each time get the most up to date counts.
564	*/
565	average = atomic_get(&nrdy) / config.cpu_active + 1;
566	count = average - atomic_get(&CPU->nrdy);
567
568	if (count <= 0)
569	goto satisfied;
570
571	/*
572	* Searching least priority queues on all CPU's first and most priority
573	* queues on all CPU's last.
574	*/
575	for (j = RQ_COUNT - 1; j >= 0; j--) {
576	for (i = 0; i < config.cpu_active; i++) {
577	link_t *l;
578	runq_t *r;
579	cpu_t *cpu;
580
581	cpu = &cpus[(i + k) % config.cpu_active];
582
583	/*
584	* Not interested in ourselves.
585	* Doesn't require interrupt disabling for kcpulb has
586	* THREAD_FLAG_WIRED.
587	*/
588	if (CPU == cpu)
589	continue;
590	if (atomic_get(&cpu->nrdy) <= average)
591	continue;
592
593	ipl = interrupts_disable();
594	r = &cpu->rq[j];
595	spinlock_lock(&r->lock);
596	if (r->n == 0) {
597	spinlock_unlock(&r->lock);
598	interrupts_restore(ipl);
599	continue;
600	}
601
602	t = NULL;
603	l = r->rq_head.prev; /* search rq from the back */
604	while (l != &r->rq_head) {
605	t = list_get_instance(l, thread_t, rq_link);
606	/*
607	* We don't want to steal CPU-wired threads
608	* neither threads already stolen. The latter
609	* prevents threads from migrating between CPU's
610	* without ever being run. We don't want to
611	* steal threads whose FPU context is still in
612	* CPU.
613	*/
614	spinlock_lock(&t->lock);
615	if ((!(t->flags & (THREAD_FLAG_WIRED \|
616	THREAD_FLAG_STOLEN))) &&
617	(!(t->fpu_context_engaged))) {
618	/*
619	* Remove t from r.
620	*/
621	spinlock_unlock(&t->lock);
622
623	atomic_dec(&cpu->nrdy);
624	atomic_dec(&nrdy);
625
626	r->n--;
627	list_remove(&t->rq_link);
628
629	break;
630	}
631	spinlock_unlock(&t->lock);
632	l = l->prev;
633	t = NULL;
634	}
635	spinlock_unlock(&r->lock);
636
637	if (t) {
638	/*
639	* Ready t on local CPU
640	*/
641	spinlock_lock(&t->lock);
642	#ifdef KCPULB_VERBOSE
643	printf("kcpulb%d: TID %llu -> cpu%d, nrdy=%ld, "
644	"avg=%nd\n", CPU->id, t->tid, CPU->id,
645	atomic_get(&CPU->nrdy),
646	atomic_get(&nrdy) / config.cpu_active);
647	#endif
648	t->flags \|= THREAD_FLAG_STOLEN;
649	t->state = Entering;
650	spinlock_unlock(&t->lock);
651
652	thread_ready(t);
653
654	interrupts_restore(ipl);
655
656	if (--count == 0)
657	goto satisfied;
658
659	/*
660	* We are not satisfied yet, focus on another
661	* CPU next time.
662	*/
663	k++;
664
665	continue;
666	}
667	interrupts_restore(ipl);
668	}
669	}
670
671	if (atomic_get(&CPU->nrdy)) {
672	/*
673	* Be a little bit light-weight and let migrated threads run.
674	*/
675	scheduler();
676	} else {
677	/*
678	* We failed to migrate a single thread.
679	* Give up this turn.
680	*/
681	goto loop;
682	}
683
684	goto not_satisfied;
685
686	satisfied:
687	goto loop;
688	}
689
690	#endif /* CONFIG_SMP */
691
692
693	/** Print information about threads & scheduler queues */
694	void sched_print_list(void)
695	{
696	ipl_t ipl;
697	unsigned int cpu, i;
698	runq_t *r;
699	thread_t *t;
700	link_t *cur;
701
702	/* We are going to mess with scheduler structures,
703	* let's not be interrupted */
704	ipl = interrupts_disable();
705	for (cpu = 0; cpu < config.cpu_count; cpu++) {
706
707	if (!cpus[cpu].active)
708	continue;
709
710	spinlock_lock(&cpus[cpu].lock);
711	printf("cpu%d: address=%p, nrdy=%ld, needs_relink=%ld\n",
712	cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy),
713	cpus[cpu].needs_relink);
714
715	for (i = 0; i < RQ_COUNT; i++) {
716	r = &cpus[cpu].rq[i];
717	spinlock_lock(&r->lock);
718	if (!r->n) {
719	spinlock_unlock(&r->lock);
720	continue;
721	}
722	printf("\trq[%d]: ", i);
723	for (cur = r->rq_head.next; cur != &r->rq_head;
724	cur = cur->next) {
725	t = list_get_instance(cur, thread_t, rq_link);
726	printf("%llu(%s) ", t->tid,
727	thread_states[t->state]);
728	}
729	printf("\n");
730	spinlock_unlock(&r->lock);
731	}
732	spinlock_unlock(&cpus[cpu].lock);
733	}
734
735	interrupts_restore(ipl);
736	}
737
738	/** @}
739	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ cb4afd8b

Download in other formats: