Context Navigation

scheduler.c@ 5663872

Visit:

Last change on this file since 5663872 was 5663872, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 18 months ago

Move stuff around for thread sleep

Only mark the thread as ready for wakeup after we switch to
another context. This way, soundness of the sychronization
does not depend on thread lock being held across the context
switch, which gives us more freedom.

Property mode set to 100644

File size: 16.8 KB

Line
1	/*
2	* Copyright (c) 2010 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup kernel_generic_proc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <assert.h>
42	#include <atomic.h>
43	#include <proc/scheduler.h>
44	#include <proc/thread.h>
45	#include <proc/task.h>
46	#include <mm/frame.h>
47	#include <mm/page.h>
48	#include <mm/as.h>
49	#include <time/timeout.h>
50	#include <time/delay.h>
51	#include <arch/asm.h>
52	#include <arch/faddr.h>
53	#include <arch/cycle.h>
54	#include <atomic.h>
55	#include <synch/spinlock.h>
56	#include <config.h>
57	#include <context.h>
58	#include <fpu_context.h>
59	#include <halt.h>
60	#include <arch.h>
61	#include <adt/list.h>
62	#include <panic.h>
63	#include <cpu.h>
64	#include <stdio.h>
65	#include <log.h>
66	#include <stacktrace.h>
67
68	static void scheduler_separated_stack(void);
69
70	atomic_size_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Take actions before new thread runs.
73	*
74	* Perform actions that need to be
75	* taken before the newly selected
76	* thread is passed control.
77	*
78	* THREAD->lock is locked on entry
79	*
80	*/
81	static void before_thread_runs(void)
82	{
83	before_thread_runs_arch();
84
85	#ifdef CONFIG_FPU_LAZY
86	/*
87	* The only concurrent modification possible for fpu_owner here is
88	* another thread changing it from itself to NULL in its destructor.
89	*/
90	thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
91	memory_order_relaxed);
92
93	if (THREAD == owner)
94	fpu_enable();
95	else
96	fpu_disable();
97	#elif defined CONFIG_FPU
98	fpu_enable();
99	if (THREAD->fpu_context_exists)
100	fpu_context_restore(&THREAD->fpu_context);
101	else {
102	fpu_init();
103	THREAD->fpu_context_exists = true;
104	}
105	#endif
106
107	#ifdef CONFIG_UDEBUG
108	if (THREAD->btrace) {
109	istate_t *istate = THREAD->udebug.uspace_state;
110	if (istate != NULL) {
111	printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
112	stack_trace_istate(istate);
113	}
114
115	THREAD->btrace = false;
116	}
117	#endif
118	}
119
120	/** Take actions after THREAD had run.
121	*
122	* Perform actions that need to be
123	* taken after the running thread
124	* had been preempted by the scheduler.
125	*
126	* THREAD->lock is locked on entry
127	*
128	*/
129	static void after_thread_ran(void)
130	{
131	after_thread_ran_arch();
132	}
133
134	#ifdef CONFIG_FPU_LAZY
135	void scheduler_fpu_lazy_request(void)
136	{
137	fpu_enable();
138
139	/* We need this lock to ensure synchronization with thread destructor. */
140	irq_spinlock_lock(&CPU->fpu_lock, false);
141
142	/* Save old context */
143	thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
144	if (owner != NULL) {
145	fpu_context_save(&owner->fpu_context);
146	atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
147	}
148
149	irq_spinlock_unlock(&CPU->fpu_lock, false);
150
151	if (THREAD->fpu_context_exists) {
152	fpu_context_restore(&THREAD->fpu_context);
153	} else {
154	fpu_init();
155	THREAD->fpu_context_exists = true;
156	}
157
158	atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
159	}
160	#endif /* CONFIG_FPU_LAZY */
161
162	/** Initialize scheduler
163	*
164	* Initialize kernel scheduler.
165	*
166	*/
167	void scheduler_init(void)
168	{
169	}
170
171	/** Get thread to be scheduled
172	*
173	* Get the optimal thread to be scheduled
174	* according to thread accounting and scheduler
175	* policy.
176	*
177	* @return Thread to be scheduled.
178	*
179	*/
180	static thread_t try_find_thread(int rq_index)
181	{
182	assert(interrupts_disabled());
183	assert(CPU != NULL);
184
185	if (atomic_load(&CPU->nrdy) == 0)
186	return NULL;
187
188	for (int i = 0; i < RQ_COUNT; i++) {
189	irq_spinlock_lock(&(CPU->rq[i].lock), false);
190	if (CPU->rq[i].n == 0) {
191	/*
192	* If this queue is empty, try a lower-priority queue.
193	*/
194	irq_spinlock_unlock(&(CPU->rq[i].lock), false);
195	continue;
196	}
197
198	atomic_dec(&CPU->nrdy);
199	atomic_dec(&nrdy);
200	CPU->rq[i].n--;
201
202	/*
203	* Take the first thread from the queue.
204	*/
205	thread_t *thread = list_get_instance(
206	list_first(&CPU->rq[i].rq), thread_t, rq_link);
207	list_remove(&thread->rq_link);
208
209	irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
210
211	thread->cpu = CPU;
212	thread->priority = i; /* Correct rq index */
213
214	/* Time allocation in microseconds. */
215	uint64_t time_to_run = (i + 1) * 10000;
216
217	/* This is safe because interrupts are disabled. */
218	CPU_LOCAL->preempt_deadline =
219	CPU_LOCAL->current_clock_tick + us2ticks(time_to_run);
220
221	/*
222	* Clear the stolen flag so that it can be migrated
223	* when load balancing needs emerge.
224	*/
225	thread->stolen = false;
226	irq_spinlock_unlock(&thread->lock, false);
227
228	*rq_index = i;
229	return thread;
230	}
231
232	return NULL;
233	}
234
235	/** Get thread to be scheduled
236	*
237	* Get the optimal thread to be scheduled
238	* according to thread accounting and scheduler
239	* policy.
240	*
241	* @return Thread to be scheduled.
242	*
243	*/
244	static thread_t find_best_thread(int rq_index)
245	{
246	assert(interrupts_disabled());
247	assert(CPU != NULL);
248
249	while (true) {
250	thread_t *thread = try_find_thread(rq_index);
251
252	if (thread != NULL)
253	return thread;
254
255	/*
256	* For there was nothing to run, the CPU goes to sleep
257	* until a hardware interrupt or an IPI comes.
258	* This improves energy saving and hyperthreading.
259	*/
260	CPU_LOCAL->idle = true;
261
262	/*
263	* Go to sleep with interrupts enabled.
264	* Ideally, this should be atomic, but this is not guaranteed on
265	* all platforms yet, so it is possible we will go sleep when
266	* a thread has just become available.
267	*/
268	cpu_interruptible_sleep();
269	}
270	}
271
272	static void switch_task(task_t *task)
273	{
274	/* If the task stays the same, a lot of work is avoided. */
275	if (TASK == task)
276	return;
277
278	as_t *old_as = AS;
279	as_t *new_as = task->as;
280
281	/* It is possible for two tasks to share one address space. */
282	if (old_as != new_as)
283	as_switch(old_as, new_as);
284
285	if (TASK)
286	task_release(TASK);
287
288	TASK = task;
289
290	task_hold(TASK);
291
292	before_task_runs_arch();
293	}
294
295	/** Prevent rq starvation
296	*
297	* Prevent low priority threads from starving in rq's.
298	*
299	* When the function decides to relink rq's, it reconnects
300	* respective pointers so that in result threads with 'pri'
301	* greater or equal start are moved to a higher-priority queue.
302	*
303	* @param start Threshold priority.
304	*
305	*/
306	static void relink_rq(int start)
307	{
308	if (CPU_LOCAL->current_clock_tick < CPU_LOCAL->relink_deadline)
309	return;
310
311	CPU_LOCAL->relink_deadline = CPU_LOCAL->current_clock_tick + NEEDS_RELINK_MAX;
312
313	/* Temporary cache for lists we are moving. */
314	list_t list;
315	list_initialize(&list);
316
317	size_t n = 0;
318
319	/* Move every list (except the one with highest priority) one level up. */
320	for (int i = RQ_COUNT - 1; i > start; i--) {
321	irq_spinlock_lock(&CPU->rq[i].lock, false);
322
323	/* Swap lists. */
324	list_swap(&CPU->rq[i].rq, &list);
325
326	/* Swap number of items. */
327	size_t tmpn = CPU->rq[i].n;
328	CPU->rq[i].n = n;
329	n = tmpn;
330
331	irq_spinlock_unlock(&CPU->rq[i].lock, false);
332	}
333
334	/* Append the contents of rq[start + 1] to rq[start]. */
335	if (n != 0) {
336	irq_spinlock_lock(&CPU->rq[start].lock, false);
337	list_concat(&CPU->rq[start].rq, &list);
338	CPU->rq[start].n += n;
339	irq_spinlock_unlock(&CPU->rq[start].lock, false);
340	}
341	}
342
343	void scheduler(void)
344	{
345	ipl_t ipl = interrupts_disable();
346
347	if (atomic_load(&haltstate))
348	halt();
349
350	if (THREAD) {
351	irq_spinlock_lock(&THREAD->lock, false);
352	}
353
354	scheduler_locked(ipl);
355	}
356
357	/** The scheduler
358	*
359	* The thread scheduling procedure.
360	* Passes control directly to
361	* scheduler_separated_stack().
362	*
363	*/
364	void scheduler_locked(ipl_t ipl)
365	{
366	assert(CPU != NULL);
367
368	if (THREAD) {
369	/* Update thread kernel accounting */
370	THREAD->kcycles += get_cycle() - THREAD->last_cycle;
371
372	#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
373	fpu_context_save(&THREAD->fpu_context);
374	#endif
375	if (!context_save(&THREAD->saved_context)) {
376	/*
377	* This is the place where threads leave scheduler();
378	*/
379
380	/* Save current CPU cycle */
381	THREAD->last_cycle = get_cycle();
382
383	irq_spinlock_unlock(&THREAD->lock, false);
384	interrupts_restore(THREAD->saved_ipl);
385
386	return;
387	}
388
389	/*
390	* Interrupt priority level of preempted thread is recorded
391	* here to facilitate scheduler() invocations from
392	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
393	*
394	*/
395	THREAD->saved_ipl = ipl;
396	}
397
398	/*
399	* Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
400	* and preemption counter. At this point CURRENT could be coming either
401	* from THREAD's or CPU's stack.
402	*
403	*/
404	current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
405
406	/*
407	* We may not keep the old stack.
408	* Reason: If we kept the old stack and got blocked, for instance, in
409	* find_best_thread(), the old thread could get rescheduled by another
410	* CPU and overwrite the part of its own stack that was also used by
411	* the scheduler on this CPU.
412	*
413	* Moreover, we have to bypass the compiler-generated POP sequence
414	* which is fooled by SP being set to the very top of the stack.
415	* Therefore the scheduler() function continues in
416	* scheduler_separated_stack().
417	*
418	*/
419	context_t ctx;
420	context_save(&ctx);
421	context_set(&ctx, FADDR(scheduler_separated_stack),
422	(uintptr_t) CPU_LOCAL->stack, STACK_SIZE);
423	context_restore(&ctx);
424
425	/* Not reached */
426	}
427
428	/** Scheduler stack switch wrapper
429	*
430	* Second part of the scheduler() function
431	* using new stack. Handling the actual context
432	* switch to a new thread.
433	*
434	*/
435	void scheduler_separated_stack(void)
436	{
437	assert((!THREAD) \|\| (irq_spinlock_locked(&THREAD->lock)));
438	assert(CPU != NULL);
439	assert(interrupts_disabled());
440
441	if (THREAD) {
442	/* Must be run after the switch to scheduler stack */
443	after_thread_ran();
444
445	int expected;
446
447	switch (THREAD->state) {
448	case Running:
449	irq_spinlock_unlock(&THREAD->lock, false);
450	thread_ready(THREAD);
451	break;
452
453	case Exiting:
454	irq_spinlock_unlock(&THREAD->lock, false);
455	waitq_close(&THREAD->join_wq);
456
457	/*
458	* Release the reference CPU has for the thread.
459	* If there are no other references (e.g. threads calling join),
460	* the thread structure is deallocated.
461	*/
462	thread_put(THREAD);
463	break;
464
465	case Sleeping:
466	expected = SLEEP_INITIAL;
467
468	/* Only set SLEEP_ASLEEP in sleep pad if it's still in initial state */
469	if (atomic_compare_exchange_strong_explicit(&THREAD->sleep_state,
470	&expected, SLEEP_ASLEEP,
471	memory_order_acq_rel, memory_order_acquire)) {
472
473	/* Prefer the thread after it's woken up. */
474	THREAD->priority = -1;
475	irq_spinlock_unlock(&THREAD->lock, false);
476	} else {
477	assert(expected == SLEEP_WOKE);
478	/* The thread has already been woken up, requeue immediately. */
479	irq_spinlock_unlock(&THREAD->lock, false);
480	thread_ready(THREAD);
481	}
482
483	break;
484
485	default:
486	/*
487	* Entering state is unexpected.
488	*/
489	panic("tid%" PRIu64 ": unexpected state %s.",
490	THREAD->tid, thread_states[THREAD->state]);
491	break;
492	}
493
494	THREAD = NULL;
495	}
496
497	int rq_index;
498	THREAD = find_best_thread(&rq_index);
499
500	relink_rq(rq_index);
501
502	switch_task(THREAD->task);
503
504	irq_spinlock_lock(&THREAD->lock, false);
505	THREAD->state = Running;
506
507	#ifdef SCHEDULER_VERBOSE
508	log(LF_OTHER, LVL_DEBUG,
509	"cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
510	", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
511	THREAD->ticks, atomic_load(&CPU->nrdy));
512	#endif
513
514	/*
515	* Some architectures provide late kernel PA2KA(identity)
516	* mapping in a page fault handler. However, the page fault
517	* handler uses the kernel stack of the running thread and
518	* therefore cannot be used to map it. The kernel stack, if
519	* necessary, is to be mapped in before_thread_runs(). This
520	* function must be executed before the switch to the new stack.
521	*/
522	before_thread_runs();
523
524	/*
525	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
526	* thread's stack.
527	*/
528	current_copy(CURRENT, (current_t *) THREAD->kstack);
529
530	context_restore(&THREAD->saved_context);
531
532	/* Not reached */
533	}
534
535	#ifdef CONFIG_SMP
536
537	static thread_t steal_thread_from(cpu_t old_cpu, int i)
538	{
539	runq_t *old_rq = &old_cpu->rq[i];
540	runq_t *new_rq = &CPU->rq[i];
541
542	ipl_t ipl = interrupts_disable();
543
544	irq_spinlock_lock(&old_rq->lock, false);
545
546	/*
547	* If fpu_owner is any thread in the list, its store is seen here thanks to
548	* the runqueue lock.
549	*/
550	thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
551	memory_order_relaxed);
552
553	/* Search rq from the back */
554	list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
555
556	irq_spinlock_lock(&thread->lock, false);
557
558	/*
559	* Do not steal CPU-wired threads, threads
560	* already stolen, threads for which migration
561	* was temporarily disabled or threads whose
562	* FPU context is still in the CPU.
563	*/
564	if (thread->stolen \|\| thread->nomigrate \|\|
565	thread == fpu_owner) {
566	irq_spinlock_unlock(&thread->lock, false);
567	continue;
568	}
569
570	thread->stolen = true;
571	thread->cpu = CPU;
572
573	irq_spinlock_unlock(&thread->lock, false);
574
575	/*
576	* Ready thread on local CPU
577	*/
578
579	#ifdef KCPULB_VERBOSE
580	log(LF_OTHER, LVL_DEBUG,
581	"kcpulb%u: TID %" PRIu64 " -> cpu%u, "
582	"nrdy=%ld, avg=%ld", CPU->id, thread->tid,
583	CPU->id, atomic_load(&CPU->nrdy),
584	atomic_load(&nrdy) / config.cpu_active);
585	#endif
586
587	/* Remove thread from ready queue. */
588	old_rq->n--;
589	list_remove(&thread->rq_link);
590	irq_spinlock_unlock(&old_rq->lock, false);
591
592	/* Append thread to local queue. */
593	irq_spinlock_lock(&new_rq->lock, false);
594	list_append(&thread->rq_link, &new_rq->rq);
595	new_rq->n++;
596	irq_spinlock_unlock(&new_rq->lock, false);
597
598	atomic_dec(&old_cpu->nrdy);
599	atomic_inc(&CPU->nrdy);
600	interrupts_restore(ipl);
601	return thread;
602	}
603
604	irq_spinlock_unlock(&old_rq->lock, false);
605	interrupts_restore(ipl);
606	return NULL;
607	}
608
609	/** Load balancing thread
610	*
611	* SMP load balancing thread, supervising thread supplies
612	* for the CPU it's wired to.
613	*
614	* @param arg Generic thread argument (unused).
615	*
616	*/
617	void kcpulb(void *arg)
618	{
619	size_t average;
620	size_t rdy;
621
622	loop:
623	/*
624	* Work in 1s intervals.
625	*/
626	thread_sleep(1);
627
628	not_satisfied:
629	/*
630	* Calculate the number of threads that will be migrated/stolen from
631	* other CPU's. Note that situation can have changed between two
632	* passes. Each time get the most up to date counts.
633	*
634	*/
635	average = atomic_load(&nrdy) / config.cpu_active + 1;
636	rdy = atomic_load(&CPU->nrdy);
637
638	if (average <= rdy)
639	goto satisfied;
640
641	size_t count = average - rdy;
642
643	/*
644	* Searching least priority queues on all CPU's first and most priority
645	* queues on all CPU's last.
646	*/
647	size_t acpu;
648	int rq;
649
650	for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
651	for (acpu = 0; acpu < config.cpu_active; acpu++) {
652	cpu_t *cpu = &cpus[acpu];
653
654	/*
655	* Not interested in ourselves.
656	* Doesn't require interrupt disabling for kcpulb has
657	* THREAD_FLAG_WIRED.
658	*
659	*/
660	if (CPU == cpu)
661	continue;
662
663	if (atomic_load(&cpu->nrdy) <= average)
664	continue;
665
666	if (steal_thread_from(cpu, rq) && --count == 0)
667	goto satisfied;
668	}
669	}
670
671	if (atomic_load(&CPU->nrdy)) {
672	/*
673	* Be a little bit light-weight and let migrated threads run.
674	*
675	*/
676	scheduler();
677	} else {
678	/*
679	* We failed to migrate a single thread.
680	* Give up this turn.
681	*
682	*/
683	goto loop;
684	}
685
686	goto not_satisfied;
687
688	satisfied:
689	goto loop;
690	}
691	#endif /* CONFIG_SMP */
692
693	/** Print information about threads & scheduler queues
694	*
695	*/
696	void sched_print_list(void)
697	{
698	size_t cpu;
699	for (cpu = 0; cpu < config.cpu_count; cpu++) {
700	if (!cpus[cpu].active)
701	continue;
702
703	printf("cpu%u: address=%p, nrdy=%zu\n",
704	cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy));
705
706	unsigned int i;
707	for (i = 0; i < RQ_COUNT; i++) {
708	irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
709	if (cpus[cpu].rq[i].n == 0) {
710	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
711	continue;
712	}
713
714	printf("\trq[%u]: ", i);
715	list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
716	thread) {
717	printf("%" PRIu64 "(%s) ", thread->tid,
718	thread_states[thread->state]);
719	}
720	printf("\n");
721
722	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
723	}
724	}
725	}
726
727	/** @}
728	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ 5663872

Download in other formats: