Context Navigation

scheduler.c@ 111b9b9

Visit:

ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 111b9b9 was 111b9b9, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 years ago

Reimplement waitq using thread_wait/wakeup

This adds a few functions to the thread API which can be
summarized as "stop running until woken up by others".
The ordering and context-switching concerns are thus yeeted
to this abstraction and waitq only deals with maintaining
the queues. Overall, this makes the control flow in waitq
much easier to navigate.

Property mode set to 100644

File size: 16.7 KB

Line
1	/*
2	* Copyright (c) 2010 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup kernel_generic_proc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <assert.h>
42	#include <atomic.h>
43	#include <proc/scheduler.h>
44	#include <proc/thread.h>
45	#include <proc/task.h>
46	#include <mm/frame.h>
47	#include <mm/page.h>
48	#include <mm/as.h>
49	#include <time/timeout.h>
50	#include <time/delay.h>
51	#include <arch/asm.h>
52	#include <arch/faddr.h>
53	#include <arch/cycle.h>
54	#include <atomic.h>
55	#include <synch/spinlock.h>
56	#include <config.h>
57	#include <context.h>
58	#include <fpu_context.h>
59	#include <halt.h>
60	#include <arch.h>
61	#include <adt/list.h>
62	#include <panic.h>
63	#include <cpu.h>
64	#include <stdio.h>
65	#include <log.h>
66	#include <stacktrace.h>
67
68	static void scheduler_separated_stack(void);
69
70	atomic_size_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Carry out actions before new task runs. */
73	static void before_task_runs(void)
74	{
75	before_task_runs_arch();
76	}
77
78	/** Take actions before new thread runs.
79	*
80	* Perform actions that need to be
81	* taken before the newly selected
82	* thread is passed control.
83	*
84	* THREAD->lock is locked on entry
85	*
86	*/
87	static void before_thread_runs(void)
88	{
89	before_thread_runs_arch();
90
91	#ifdef CONFIG_FPU_LAZY
92	if (THREAD == CPU->fpu_owner)
93	fpu_enable();
94	else
95	fpu_disable();
96	#elif defined CONFIG_FPU
97	fpu_enable();
98	if (THREAD->fpu_context_exists)
99	fpu_context_restore(&THREAD->fpu_context);
100	else {
101	fpu_init();
102	THREAD->fpu_context_exists = true;
103	}
104	#endif
105
106	#ifdef CONFIG_UDEBUG
107	if (THREAD->btrace) {
108	istate_t *istate = THREAD->udebug.uspace_state;
109	if (istate != NULL) {
110	printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
111	stack_trace_istate(istate);
112	}
113
114	THREAD->btrace = false;
115	}
116	#endif
117	}
118
119	/** Take actions after THREAD had run.
120	*
121	* Perform actions that need to be
122	* taken after the running thread
123	* had been preempted by the scheduler.
124	*
125	* THREAD->lock is locked on entry
126	*
127	*/
128	static void after_thread_ran(void)
129	{
130	after_thread_ran_arch();
131	}
132
133	#ifdef CONFIG_FPU_LAZY
134	void scheduler_fpu_lazy_request(void)
135	{
136	fpu_enable();
137	irq_spinlock_lock(&CPU->lock, false);
138
139	/* Save old context */
140	if (CPU->fpu_owner != NULL) {
141	irq_spinlock_lock(&CPU->fpu_owner->lock, false);
142	fpu_context_save(&CPU->fpu_owner->fpu_context);
143
144	/* Don't prevent migration */
145	CPU->fpu_owner->fpu_context_engaged = false;
146	irq_spinlock_unlock(&CPU->fpu_owner->lock, false);
147	CPU->fpu_owner = NULL;
148	}
149
150	irq_spinlock_lock(&THREAD->lock, false);
151	if (THREAD->fpu_context_exists) {
152	fpu_context_restore(&THREAD->fpu_context);
153	} else {
154	fpu_init();
155	THREAD->fpu_context_exists = true;
156	}
157
158	CPU->fpu_owner = THREAD;
159	THREAD->fpu_context_engaged = true;
160	irq_spinlock_unlock(&THREAD->lock, false);
161
162	irq_spinlock_unlock(&CPU->lock, false);
163	}
164	#endif /* CONFIG_FPU_LAZY */
165
166	/** Initialize scheduler
167	*
168	* Initialize kernel scheduler.
169	*
170	*/
171	void scheduler_init(void)
172	{
173	}
174
175	/** Get thread to be scheduled
176	*
177	* Get the optimal thread to be scheduled
178	* according to thread accounting and scheduler
179	* policy.
180	*
181	* @return Thread to be scheduled.
182	*
183	*/
184	static thread_t *find_best_thread(void)
185	{
186	assert(CPU != NULL);
187
188	loop:
189
190	if (atomic_load(&CPU->nrdy) == 0) {
191	/*
192	* For there was nothing to run, the CPU goes to sleep
193	* until a hardware interrupt or an IPI comes.
194	* This improves energy saving and hyperthreading.
195	*/
196	irq_spinlock_lock(&CPU->lock, false);
197	CPU->idle = true;
198	irq_spinlock_unlock(&CPU->lock, false);
199	interrupts_enable();
200
201	/*
202	* An interrupt might occur right now and wake up a thread.
203	* In such case, the CPU will continue to go to sleep
204	* even though there is a runnable thread.
205	*/
206	cpu_sleep();
207	interrupts_disable();
208	goto loop;
209	}
210
211	assert(!CPU->idle);
212
213	unsigned int i;
214	for (i = 0; i < RQ_COUNT; i++) {
215	irq_spinlock_lock(&(CPU->rq[i].lock), false);
216	if (CPU->rq[i].n == 0) {
217	/*
218	* If this queue is empty, try a lower-priority queue.
219	*/
220	irq_spinlock_unlock(&(CPU->rq[i].lock), false);
221	continue;
222	}
223
224	atomic_dec(&CPU->nrdy);
225	atomic_dec(&nrdy);
226	CPU->rq[i].n--;
227
228	/*
229	* Take the first thread from the queue.
230	*/
231	thread_t *thread = list_get_instance(
232	list_first(&CPU->rq[i].rq), thread_t, rq_link);
233	list_remove(&thread->rq_link);
234
235	irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
236
237	thread->cpu = CPU;
238	thread->priority = i; /* Correct rq index */
239
240	/* Time allocation in microseconds. */
241	uint64_t time_to_run = (i + 1) * 10000;
242
243	/* This is safe because interrupts are disabled. */
244	CPU->preempt_deadline = CPU->current_clock_tick + us2ticks(time_to_run);
245
246	/*
247	* Clear the stolen flag so that it can be migrated
248	* when load balancing needs emerge.
249	*/
250	thread->stolen = false;
251	irq_spinlock_unlock(&thread->lock, false);
252
253	return thread;
254	}
255
256	goto loop;
257	}
258
259	/** Prevent rq starvation
260	*
261	* Prevent low priority threads from starving in rq's.
262	*
263	* When the function decides to relink rq's, it reconnects
264	* respective pointers so that in result threads with 'pri'
265	* greater or equal start are moved to a higher-priority queue.
266	*
267	* @param start Threshold priority.
268	*
269	*/
270	static void relink_rq(int start)
271	{
272	if (CPU->current_clock_tick < CPU->relink_deadline)
273	return;
274
275	CPU->relink_deadline = CPU->current_clock_tick + NEEDS_RELINK_MAX;
276
277	list_t list;
278	list_initialize(&list);
279
280	irq_spinlock_lock(&CPU->lock, false);
281
282	for (int i = start; i < RQ_COUNT - 1; i++) {
283	/* Remember and empty rq[i + 1] */
284
285	irq_spinlock_lock(&CPU->rq[i + 1].lock, false);
286	list_concat(&list, &CPU->rq[i + 1].rq);
287	size_t n = CPU->rq[i + 1].n;
288	CPU->rq[i + 1].n = 0;
289	irq_spinlock_unlock(&CPU->rq[i + 1].lock, false);
290
291	/* Append rq[i + 1] to rq[i] */
292
293	irq_spinlock_lock(&CPU->rq[i].lock, false);
294	list_concat(&CPU->rq[i].rq, &list);
295	CPU->rq[i].n += n;
296	irq_spinlock_unlock(&CPU->rq[i].lock, false);
297	}
298
299	irq_spinlock_unlock(&CPU->lock, false);
300	}
301
302	void scheduler(void)
303	{
304	ipl_t ipl = interrupts_disable();
305
306	if (atomic_load(&haltstate))
307	halt();
308
309	if (THREAD) {
310	irq_spinlock_lock(&THREAD->lock, false);
311	}
312
313	scheduler_locked(ipl);
314	}
315
316	/** The scheduler
317	*
318	* The thread scheduling procedure.
319	* Passes control directly to
320	* scheduler_separated_stack().
321	*
322	*/
323	void scheduler_locked(ipl_t ipl)
324	{
325	assert(CPU != NULL);
326
327	if (THREAD) {
328	/* Update thread kernel accounting */
329	THREAD->kcycles += get_cycle() - THREAD->last_cycle;
330
331	#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
332	fpu_context_save(&THREAD->fpu_context);
333	#endif
334	if (!context_save(&THREAD->saved_context)) {
335	/*
336	* This is the place where threads leave scheduler();
337	*/
338
339	/* Save current CPU cycle */
340	THREAD->last_cycle = get_cycle();
341
342	irq_spinlock_unlock(&THREAD->lock, false);
343	interrupts_restore(THREAD->saved_ipl);
344
345	return;
346	}
347
348	/*
349	* Interrupt priority level of preempted thread is recorded
350	* here to facilitate scheduler() invocations from
351	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
352	*
353	*/
354	THREAD->saved_ipl = ipl;
355	}
356
357	/*
358	* Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
359	* and preemption counter. At this point CURRENT could be coming either
360	* from THREAD's or CPU's stack.
361	*
362	*/
363	current_copy(CURRENT, (current_t *) CPU->stack);
364
365	/*
366	* We may not keep the old stack.
367	* Reason: If we kept the old stack and got blocked, for instance, in
368	* find_best_thread(), the old thread could get rescheduled by another
369	* CPU and overwrite the part of its own stack that was also used by
370	* the scheduler on this CPU.
371	*
372	* Moreover, we have to bypass the compiler-generated POP sequence
373	* which is fooled by SP being set to the very top of the stack.
374	* Therefore the scheduler() function continues in
375	* scheduler_separated_stack().
376	*
377	*/
378	context_t ctx;
379	context_save(&ctx);
380	context_set(&ctx, FADDR(scheduler_separated_stack),
381	(uintptr_t) CPU->stack, STACK_SIZE);
382	context_restore(&ctx);
383
384	/* Not reached */
385	}
386
387	/** Scheduler stack switch wrapper
388	*
389	* Second part of the scheduler() function
390	* using new stack. Handling the actual context
391	* switch to a new thread.
392	*
393	*/
394	void scheduler_separated_stack(void)
395	{
396	task_t *old_task = TASK;
397	as_t *old_as = AS;
398
399	assert((!THREAD) \|\| (irq_spinlock_locked(&THREAD->lock)));
400	assert(CPU != NULL);
401	assert(interrupts_disabled());
402
403	/*
404	* Hold the current task and the address space to prevent their
405	* possible destruction should thread_destroy() be called on this or any
406	* other processor while the scheduler is still using them.
407	*/
408	if (old_task)
409	task_hold(old_task);
410
411	if (old_as)
412	as_hold(old_as);
413
414	if (THREAD) {
415	/* Must be run after the switch to scheduler stack */
416	after_thread_ran();
417
418	switch (THREAD->state) {
419	case Running:
420	irq_spinlock_unlock(&THREAD->lock, false);
421	thread_ready(THREAD);
422	break;
423
424	case Exiting:
425	irq_spinlock_unlock(&THREAD->lock, false);
426	waitq_close(&THREAD->join_wq);
427
428	/*
429	* Release the reference CPU has for the thread.
430	* If there are no other references (e.g. threads calling join),
431	* the thread structure is deallocated.
432	*/
433	thread_put(THREAD);
434	break;
435
436	case Sleeping:
437	/*
438	* Prefer the thread after it's woken up.
439	*/
440	THREAD->priority = -1;
441	irq_spinlock_unlock(&THREAD->lock, false);
442	break;
443
444	default:
445	/*
446	* Entering state is unexpected.
447	*/
448	panic("tid%" PRIu64 ": unexpected state %s.",
449	THREAD->tid, thread_states[THREAD->state]);
450	break;
451	}
452
453	THREAD = NULL;
454	}
455
456	THREAD = find_best_thread();
457
458	irq_spinlock_lock(&THREAD->lock, false);
459	int priority = THREAD->priority;
460	irq_spinlock_unlock(&THREAD->lock, false);
461
462	relink_rq(priority);
463
464	/*
465	* If both the old and the new task are the same,
466	* lots of work is avoided.
467	*/
468	if (TASK != THREAD->task) {
469	as_t *new_as = THREAD->task->as;
470
471	/*
472	* Note that it is possible for two tasks
473	* to share one address space.
474	*/
475	if (old_as != new_as) {
476	/*
477	* Both tasks and address spaces are different.
478	* Replace the old one with the new one.
479	*/
480	as_switch(old_as, new_as);
481	}
482
483	TASK = THREAD->task;
484	before_task_runs();
485	}
486
487	if (old_task)
488	task_release(old_task);
489
490	if (old_as)
491	as_release(old_as);
492
493	irq_spinlock_lock(&THREAD->lock, false);
494	THREAD->state = Running;
495
496	#ifdef SCHEDULER_VERBOSE
497	log(LF_OTHER, LVL_DEBUG,
498	"cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
499	", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
500	THREAD->ticks, atomic_load(&CPU->nrdy));
501	#endif
502
503	/*
504	* Some architectures provide late kernel PA2KA(identity)
505	* mapping in a page fault handler. However, the page fault
506	* handler uses the kernel stack of the running thread and
507	* therefore cannot be used to map it. The kernel stack, if
508	* necessary, is to be mapped in before_thread_runs(). This
509	* function must be executed before the switch to the new stack.
510	*/
511	before_thread_runs();
512
513	/*
514	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
515	* thread's stack.
516	*/
517	current_copy(CURRENT, (current_t *) THREAD->kstack);
518
519	context_restore(&THREAD->saved_context);
520
521	/* Not reached */
522	}
523
524	#ifdef CONFIG_SMP
525	/** Load balancing thread
526	*
527	* SMP load balancing thread, supervising thread supplies
528	* for the CPU it's wired to.
529	*
530	* @param arg Generic thread argument (unused).
531	*
532	*/
533	void kcpulb(void *arg)
534	{
535	size_t average;
536	size_t rdy;
537
538	loop:
539	/*
540	* Work in 1s intervals.
541	*/
542	thread_sleep(1);
543
544	not_satisfied:
545	/*
546	* Calculate the number of threads that will be migrated/stolen from
547	* other CPU's. Note that situation can have changed between two
548	* passes. Each time get the most up to date counts.
549	*
550	*/
551	average = atomic_load(&nrdy) / config.cpu_active + 1;
552	rdy = atomic_load(&CPU->nrdy);
553
554	if (average <= rdy)
555	goto satisfied;
556
557	size_t count = average - rdy;
558
559	/*
560	* Searching least priority queues on all CPU's first and most priority
561	* queues on all CPU's last.
562	*/
563	size_t acpu;
564	size_t acpu_bias = 0;
565	int rq;
566
567	for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
568	for (acpu = 0; acpu < config.cpu_active; acpu++) {
569	cpu_t *cpu = &cpus[(acpu + acpu_bias) % config.cpu_active];
570
571	/*
572	* Not interested in ourselves.
573	* Doesn't require interrupt disabling for kcpulb has
574	* THREAD_FLAG_WIRED.
575	*
576	*/
577	if (CPU == cpu)
578	continue;
579
580	if (atomic_load(&cpu->nrdy) <= average)
581	continue;
582
583	irq_spinlock_lock(&(cpu->rq[rq].lock), true);
584	if (cpu->rq[rq].n == 0) {
585	irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
586	continue;
587	}
588
589	thread_t *thread = NULL;
590
591	/* Search rq from the back */
592	link_t *link = list_last(&cpu->rq[rq].rq);
593
594	while (link != NULL) {
595	thread = (thread_t *) list_get_instance(link,
596	thread_t, rq_link);
597
598	/*
599	* Do not steal CPU-wired threads, threads
600	* already stolen, threads for which migration
601	* was temporarily disabled or threads whose
602	* FPU context is still in the CPU.
603	*/
604	irq_spinlock_lock(&thread->lock, false);
605
606	if ((!thread->wired) && (!thread->stolen) &&
607	(!thread->nomigrate) &&
608	(!thread->fpu_context_engaged)) {
609	/*
610	* Remove thread from ready queue.
611	*/
612	irq_spinlock_unlock(&thread->lock,
613	false);
614
615	atomic_dec(&cpu->nrdy);
616	atomic_dec(&nrdy);
617
618	cpu->rq[rq].n--;
619	list_remove(&thread->rq_link);
620
621	break;
622	}
623
624	irq_spinlock_unlock(&thread->lock, false);
625
626	link = list_prev(link, &cpu->rq[rq].rq);
627	thread = NULL;
628	}
629
630	if (thread) {
631	/*
632	* Ready thread on local CPU
633	*/
634
635	irq_spinlock_pass(&(cpu->rq[rq].lock),
636	&thread->lock);
637
638	#ifdef KCPULB_VERBOSE
639	log(LF_OTHER, LVL_DEBUG,
640	"kcpulb%u: TID %" PRIu64 " -> cpu%u, "
641	"nrdy=%ld, avg=%ld", CPU->id, thread->tid,
642	CPU->id, atomic_load(&CPU->nrdy),
643	atomic_load(&nrdy) / config.cpu_active);
644	#endif
645
646	thread->stolen = true;
647	thread->state = Entering;
648
649	irq_spinlock_unlock(&thread->lock, true);
650	thread_ready(thread);
651
652	if (--count == 0)
653	goto satisfied;
654
655	/*
656	* We are not satisfied yet, focus on another
657	* CPU next time.
658	*
659	*/
660	acpu_bias++;
661
662	continue;
663	} else
664	irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
665
666	}
667	}
668
669	if (atomic_load(&CPU->nrdy)) {
670	/*
671	* Be a little bit light-weight and let migrated threads run.
672	*
673	*/
674	scheduler();
675	} else {
676	/*
677	* We failed to migrate a single thread.
678	* Give up this turn.
679	*
680	*/
681	goto loop;
682	}
683
684	goto not_satisfied;
685
686	satisfied:
687	goto loop;
688	}
689	#endif /* CONFIG_SMP */
690
691	/** Print information about threads & scheduler queues
692	*
693	*/
694	void sched_print_list(void)
695	{
696	size_t cpu;
697	for (cpu = 0; cpu < config.cpu_count; cpu++) {
698	if (!cpus[cpu].active)
699	continue;
700
701	irq_spinlock_lock(&cpus[cpu].lock, true);
702
703	/* Technically a data race, but we don't really care in this case. */
704	int needs_relink = cpus[cpu].relink_deadline - cpus[cpu].current_clock_tick;
705
706	printf("cpu%u: address=%p, nrdy=%zu, needs_relink=%d\n",
707	cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy),
708	needs_relink);
709
710	unsigned int i;
711	for (i = 0; i < RQ_COUNT; i++) {
712	irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
713	if (cpus[cpu].rq[i].n == 0) {
714	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
715	continue;
716	}
717
718	printf("\trq[%u]: ", i);
719	list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
720	thread) {
721	printf("%" PRIu64 "(%s) ", thread->tid,
722	thread_states[thread->state]);
723	}
724	printf("\n");
725
726	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
727	}
728
729	irq_spinlock_unlock(&cpus[cpu].lock, true);
730	}
731	}
732
733	/** @}
734	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ 111b9b9

Download in other formats: