Context Navigation

scheduler.c@ f3dbe27

Visit:

ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since f3dbe27 was f3dbe27, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 years ago

Reduce locking further with lazy FPU

It turns out we only need a lock to synchronize between the trap
handler and thread destructor. The atomic operations introduced
are just plain reads and writes, written in an ugly fashion to
appease C11 undefined behavior gods.

In principle we could get rid of that if we made cpu_t::fpu_owner
a strong reference, but that would mean a thread structure could
be held in limbo indefinitely if a new thread is not being
scheduled or doesn't use FPU.

Property mode set to 100644

File size: 16.2 KB

Line
1	/*
2	* Copyright (c) 2010 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup kernel_generic_proc
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Scheduler and load balancing.
36	*
37	* This file contains the scheduler and kcpulb kernel thread which
38	* performs load-balancing of per-CPU run queues.
39	*/
40
41	#include <assert.h>
42	#include <atomic.h>
43	#include <proc/scheduler.h>
44	#include <proc/thread.h>
45	#include <proc/task.h>
46	#include <mm/frame.h>
47	#include <mm/page.h>
48	#include <mm/as.h>
49	#include <time/timeout.h>
50	#include <time/delay.h>
51	#include <arch/asm.h>
52	#include <arch/faddr.h>
53	#include <arch/cycle.h>
54	#include <atomic.h>
55	#include <synch/spinlock.h>
56	#include <config.h>
57	#include <context.h>
58	#include <fpu_context.h>
59	#include <halt.h>
60	#include <arch.h>
61	#include <adt/list.h>
62	#include <panic.h>
63	#include <cpu.h>
64	#include <stdio.h>
65	#include <log.h>
66	#include <stacktrace.h>
67
68	static void scheduler_separated_stack(void);
69
70	atomic_size_t nrdy; /*< Number of ready threads in the system. /
71
72	/** Take actions before new thread runs.
73	*
74	* Perform actions that need to be
75	* taken before the newly selected
76	* thread is passed control.
77	*
78	* THREAD->lock is locked on entry
79	*
80	*/
81	static void before_thread_runs(void)
82	{
83	before_thread_runs_arch();
84
85	#ifdef CONFIG_FPU_LAZY
86	/*
87	* The only concurrent modification possible for fpu_owner here is
88	* another thread changing it from itself to NULL in its destructor.
89	*/
90	thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
91	memory_order_relaxed);
92
93	if (THREAD == owner)
94	fpu_enable();
95	else
96	fpu_disable();
97	#elif defined CONFIG_FPU
98	fpu_enable();
99	if (THREAD->fpu_context_exists)
100	fpu_context_restore(&THREAD->fpu_context);
101	else {
102	fpu_init();
103	THREAD->fpu_context_exists = true;
104	}
105	#endif
106
107	#ifdef CONFIG_UDEBUG
108	if (THREAD->btrace) {
109	istate_t *istate = THREAD->udebug.uspace_state;
110	if (istate != NULL) {
111	printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
112	stack_trace_istate(istate);
113	}
114
115	THREAD->btrace = false;
116	}
117	#endif
118	}
119
120	/** Take actions after THREAD had run.
121	*
122	* Perform actions that need to be
123	* taken after the running thread
124	* had been preempted by the scheduler.
125	*
126	* THREAD->lock is locked on entry
127	*
128	*/
129	static void after_thread_ran(void)
130	{
131	after_thread_ran_arch();
132	}
133
134	#ifdef CONFIG_FPU_LAZY
135	void scheduler_fpu_lazy_request(void)
136	{
137	fpu_enable();
138
139	/* We need this lock to ensure synchronization with thread destructor. */
140	irq_spinlock_lock(&CPU->fpu_lock, false);
141
142	/* Save old context */
143	thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
144	if (owner != NULL) {
145	fpu_context_save(&owner->fpu_context);
146	atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
147	}
148
149	irq_spinlock_unlock(&CPU->fpu_lock, false);
150
151	if (THREAD->fpu_context_exists) {
152	fpu_context_restore(&THREAD->fpu_context);
153	} else {
154	fpu_init();
155	THREAD->fpu_context_exists = true;
156	}
157
158	atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
159	}
160	#endif /* CONFIG_FPU_LAZY */
161
162	/** Initialize scheduler
163	*
164	* Initialize kernel scheduler.
165	*
166	*/
167	void scheduler_init(void)
168	{
169	}
170
171	/** Get thread to be scheduled
172	*
173	* Get the optimal thread to be scheduled
174	* according to thread accounting and scheduler
175	* policy.
176	*
177	* @return Thread to be scheduled.
178	*
179	*/
180	static thread_t *find_best_thread(void)
181	{
182	assert(CPU != NULL);
183
184	loop:
185	if (atomic_load(&CPU->nrdy) == 0) {
186	/*
187	* For there was nothing to run, the CPU goes to sleep
188	* until a hardware interrupt or an IPI comes.
189	* This improves energy saving and hyperthreading.
190	*/
191	CPU->idle = true;
192
193	/*
194	* Go to sleep with interrupts enabled.
195	* Ideally, this should be atomic, but this is not guaranteed on
196	* all platforms yet, so it is possible we will go sleep when
197	* a thread has just become available.
198	*/
199	cpu_interruptible_sleep();
200
201	/* Interrupts are disabled again. */
202	goto loop;
203	}
204
205	assert(!CPU->idle);
206
207	unsigned int i;
208	for (i = 0; i < RQ_COUNT; i++) {
209	irq_spinlock_lock(&(CPU->rq[i].lock), false);
210	if (CPU->rq[i].n == 0) {
211	/*
212	* If this queue is empty, try a lower-priority queue.
213	*/
214	irq_spinlock_unlock(&(CPU->rq[i].lock), false);
215	continue;
216	}
217
218	atomic_dec(&CPU->nrdy);
219	atomic_dec(&nrdy);
220	CPU->rq[i].n--;
221
222	/*
223	* Take the first thread from the queue.
224	*/
225	thread_t *thread = list_get_instance(
226	list_first(&CPU->rq[i].rq), thread_t, rq_link);
227	list_remove(&thread->rq_link);
228
229	irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
230
231	thread->cpu = CPU;
232	thread->priority = i; /* Correct rq index */
233
234	/* Time allocation in microseconds. */
235	uint64_t time_to_run = (i + 1) * 10000;
236
237	/* This is safe because interrupts are disabled. */
238	CPU->preempt_deadline = CPU->current_clock_tick + us2ticks(time_to_run);
239
240	/*
241	* Clear the stolen flag so that it can be migrated
242	* when load balancing needs emerge.
243	*/
244	thread->stolen = false;
245	irq_spinlock_unlock(&thread->lock, false);
246
247	return thread;
248	}
249
250	goto loop;
251	}
252
253	static void switch_task(task_t *task)
254	{
255	/* If the task stays the same, a lot of work is avoided. */
256	if (TASK == task)
257	return;
258
259	as_t *old_as = AS;
260	as_t *new_as = task->as;
261
262	/* It is possible for two tasks to share one address space. */
263	if (old_as != new_as)
264	as_switch(old_as, new_as);
265
266	if (TASK)
267	task_release(TASK);
268
269	TASK = task;
270
271	task_hold(TASK);
272
273	before_task_runs_arch();
274	}
275
276	/** Prevent rq starvation
277	*
278	* Prevent low priority threads from starving in rq's.
279	*
280	* When the function decides to relink rq's, it reconnects
281	* respective pointers so that in result threads with 'pri'
282	* greater or equal start are moved to a higher-priority queue.
283	*
284	* @param start Threshold priority.
285	*
286	*/
287	static void relink_rq(int start)
288	{
289	if (CPU->current_clock_tick < CPU->relink_deadline)
290	return;
291
292	CPU->relink_deadline = CPU->current_clock_tick + NEEDS_RELINK_MAX;
293
294	/* Temporary cache for lists we are moving. */
295	list_t list;
296	list_initialize(&list);
297
298	size_t n = 0;
299
300	/* Move every list (except the one with highest priority) one level up. */
301	for (int i = RQ_COUNT - 1; i > start; i--) {
302	irq_spinlock_lock(&CPU->rq[i].lock, false);
303
304	/* Swap lists. */
305	list_swap(&CPU->rq[i].rq, &list);
306
307	/* Swap number of items. */
308	size_t tmpn = CPU->rq[i].n;
309	CPU->rq[i].n = n;
310	n = tmpn;
311
312	irq_spinlock_unlock(&CPU->rq[i].lock, false);
313	}
314
315	/* Append the contents of rq[start + 1] to rq[start]. */
316	if (n != 0) {
317	irq_spinlock_lock(&CPU->rq[start].lock, false);
318	list_concat(&CPU->rq[start].rq, &list);
319	CPU->rq[start].n += n;
320	irq_spinlock_unlock(&CPU->rq[start].lock, false);
321	}
322	}
323
324	void scheduler(void)
325	{
326	ipl_t ipl = interrupts_disable();
327
328	if (atomic_load(&haltstate))
329	halt();
330
331	if (THREAD) {
332	irq_spinlock_lock(&THREAD->lock, false);
333	}
334
335	scheduler_locked(ipl);
336	}
337
338	/** The scheduler
339	*
340	* The thread scheduling procedure.
341	* Passes control directly to
342	* scheduler_separated_stack().
343	*
344	*/
345	void scheduler_locked(ipl_t ipl)
346	{
347	assert(CPU != NULL);
348
349	if (THREAD) {
350	/* Update thread kernel accounting */
351	THREAD->kcycles += get_cycle() - THREAD->last_cycle;
352
353	#if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
354	fpu_context_save(&THREAD->fpu_context);
355	#endif
356	if (!context_save(&THREAD->saved_context)) {
357	/*
358	* This is the place where threads leave scheduler();
359	*/
360
361	/* Save current CPU cycle */
362	THREAD->last_cycle = get_cycle();
363
364	irq_spinlock_unlock(&THREAD->lock, false);
365	interrupts_restore(THREAD->saved_ipl);
366
367	return;
368	}
369
370	/*
371	* Interrupt priority level of preempted thread is recorded
372	* here to facilitate scheduler() invocations from
373	* interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
374	*
375	*/
376	THREAD->saved_ipl = ipl;
377	}
378
379	/*
380	* Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
381	* and preemption counter. At this point CURRENT could be coming either
382	* from THREAD's or CPU's stack.
383	*
384	*/
385	current_copy(CURRENT, (current_t *) CPU->stack);
386
387	/*
388	* We may not keep the old stack.
389	* Reason: If we kept the old stack and got blocked, for instance, in
390	* find_best_thread(), the old thread could get rescheduled by another
391	* CPU and overwrite the part of its own stack that was also used by
392	* the scheduler on this CPU.
393	*
394	* Moreover, we have to bypass the compiler-generated POP sequence
395	* which is fooled by SP being set to the very top of the stack.
396	* Therefore the scheduler() function continues in
397	* scheduler_separated_stack().
398	*
399	*/
400	context_t ctx;
401	context_save(&ctx);
402	context_set(&ctx, FADDR(scheduler_separated_stack),
403	(uintptr_t) CPU->stack, STACK_SIZE);
404	context_restore(&ctx);
405
406	/* Not reached */
407	}
408
409	/** Scheduler stack switch wrapper
410	*
411	* Second part of the scheduler() function
412	* using new stack. Handling the actual context
413	* switch to a new thread.
414	*
415	*/
416	void scheduler_separated_stack(void)
417	{
418	assert((!THREAD) \|\| (irq_spinlock_locked(&THREAD->lock)));
419	assert(CPU != NULL);
420	assert(interrupts_disabled());
421
422	if (THREAD) {
423	/* Must be run after the switch to scheduler stack */
424	after_thread_ran();
425
426	switch (THREAD->state) {
427	case Running:
428	irq_spinlock_unlock(&THREAD->lock, false);
429	thread_ready(THREAD);
430	break;
431
432	case Exiting:
433	irq_spinlock_unlock(&THREAD->lock, false);
434	waitq_close(&THREAD->join_wq);
435
436	/*
437	* Release the reference CPU has for the thread.
438	* If there are no other references (e.g. threads calling join),
439	* the thread structure is deallocated.
440	*/
441	thread_put(THREAD);
442	break;
443
444	case Sleeping:
445	/*
446	* Prefer the thread after it's woken up.
447	*/
448	THREAD->priority = -1;
449	irq_spinlock_unlock(&THREAD->lock, false);
450	break;
451
452	default:
453	/*
454	* Entering state is unexpected.
455	*/
456	panic("tid%" PRIu64 ": unexpected state %s.",
457	THREAD->tid, thread_states[THREAD->state]);
458	break;
459	}
460
461	THREAD = NULL;
462	}
463
464	THREAD = find_best_thread();
465
466	irq_spinlock_lock(&THREAD->lock, false);
467	int priority = THREAD->priority;
468	irq_spinlock_unlock(&THREAD->lock, false);
469
470	relink_rq(priority);
471
472	switch_task(THREAD->task);
473
474	irq_spinlock_lock(&THREAD->lock, false);
475	THREAD->state = Running;
476
477	#ifdef SCHEDULER_VERBOSE
478	log(LF_OTHER, LVL_DEBUG,
479	"cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
480	", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
481	THREAD->ticks, atomic_load(&CPU->nrdy));
482	#endif
483
484	/*
485	* Some architectures provide late kernel PA2KA(identity)
486	* mapping in a page fault handler. However, the page fault
487	* handler uses the kernel stack of the running thread and
488	* therefore cannot be used to map it. The kernel stack, if
489	* necessary, is to be mapped in before_thread_runs(). This
490	* function must be executed before the switch to the new stack.
491	*/
492	before_thread_runs();
493
494	/*
495	* Copy the knowledge of CPU, TASK, THREAD and preemption counter to
496	* thread's stack.
497	*/
498	current_copy(CURRENT, (current_t *) THREAD->kstack);
499
500	context_restore(&THREAD->saved_context);
501
502	/* Not reached */
503	}
504
505	#ifdef CONFIG_SMP
506
507	static thread_t steal_thread_from(cpu_t old_cpu, int i)
508	{
509	runq_t *old_rq = &old_cpu->rq[i];
510	runq_t *new_rq = &CPU->rq[i];
511
512	ipl_t ipl = interrupts_disable();
513
514	irq_spinlock_lock(&old_rq->lock, false);
515
516	/*
517	* If fpu_owner is any thread in the list, its store is seen here thanks to
518	* the runqueue lock.
519	*/
520	thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
521	memory_order_relaxed);
522
523	/* Search rq from the back */
524	list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
525
526	irq_spinlock_lock(&thread->lock, false);
527
528	/*
529	* Do not steal CPU-wired threads, threads
530	* already stolen, threads for which migration
531	* was temporarily disabled or threads whose
532	* FPU context is still in the CPU.
533	*/
534	if (thread->stolen \|\| thread->nomigrate \|\|
535	thread == fpu_owner) {
536	irq_spinlock_unlock(&thread->lock, false);
537	continue;
538	}
539
540	thread->stolen = true;
541	thread->cpu = CPU;
542
543	irq_spinlock_unlock(&thread->lock, false);
544
545	/*
546	* Ready thread on local CPU
547	*/
548
549	#ifdef KCPULB_VERBOSE
550	log(LF_OTHER, LVL_DEBUG,
551	"kcpulb%u: TID %" PRIu64 " -> cpu%u, "
552	"nrdy=%ld, avg=%ld", CPU->id, thread->tid,
553	CPU->id, atomic_load(&CPU->nrdy),
554	atomic_load(&nrdy) / config.cpu_active);
555	#endif
556
557	/* Remove thread from ready queue. */
558	old_rq->n--;
559	list_remove(&thread->rq_link);
560	irq_spinlock_unlock(&old_rq->lock, false);
561
562	/* Append thread to local queue. */
563	irq_spinlock_lock(&new_rq->lock, false);
564	list_append(&thread->rq_link, &new_rq->rq);
565	new_rq->n++;
566	irq_spinlock_unlock(&new_rq->lock, false);
567
568	atomic_dec(&old_cpu->nrdy);
569	atomic_inc(&CPU->nrdy);
570	interrupts_restore(ipl);
571	return thread;
572	}
573
574	irq_spinlock_unlock(&old_rq->lock, false);
575	interrupts_restore(ipl);
576	return NULL;
577	}
578
579	/** Load balancing thread
580	*
581	* SMP load balancing thread, supervising thread supplies
582	* for the CPU it's wired to.
583	*
584	* @param arg Generic thread argument (unused).
585	*
586	*/
587	void kcpulb(void *arg)
588	{
589	size_t average;
590	size_t rdy;
591
592	loop:
593	/*
594	* Work in 1s intervals.
595	*/
596	thread_sleep(1);
597
598	not_satisfied:
599	/*
600	* Calculate the number of threads that will be migrated/stolen from
601	* other CPU's. Note that situation can have changed between two
602	* passes. Each time get the most up to date counts.
603	*
604	*/
605	average = atomic_load(&nrdy) / config.cpu_active + 1;
606	rdy = atomic_load(&CPU->nrdy);
607
608	if (average <= rdy)
609	goto satisfied;
610
611	size_t count = average - rdy;
612
613	/*
614	* Searching least priority queues on all CPU's first and most priority
615	* queues on all CPU's last.
616	*/
617	size_t acpu;
618	int rq;
619
620	for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
621	for (acpu = 0; acpu < config.cpu_active; acpu++) {
622	cpu_t *cpu = &cpus[acpu];
623
624	/*
625	* Not interested in ourselves.
626	* Doesn't require interrupt disabling for kcpulb has
627	* THREAD_FLAG_WIRED.
628	*
629	*/
630	if (CPU == cpu)
631	continue;
632
633	if (atomic_load(&cpu->nrdy) <= average)
634	continue;
635
636	if (steal_thread_from(cpu, rq) && --count == 0)
637	goto satisfied;
638	}
639	}
640
641	if (atomic_load(&CPU->nrdy)) {
642	/*
643	* Be a little bit light-weight and let migrated threads run.
644	*
645	*/
646	scheduler();
647	} else {
648	/*
649	* We failed to migrate a single thread.
650	* Give up this turn.
651	*
652	*/
653	goto loop;
654	}
655
656	goto not_satisfied;
657
658	satisfied:
659	goto loop;
660	}
661	#endif /* CONFIG_SMP */
662
663	/** Print information about threads & scheduler queues
664	*
665	*/
666	void sched_print_list(void)
667	{
668	size_t cpu;
669	for (cpu = 0; cpu < config.cpu_count; cpu++) {
670	if (!cpus[cpu].active)
671	continue;
672
673	/* Technically a data race, but we don't really care in this case. */
674	int needs_relink = cpus[cpu].relink_deadline - cpus[cpu].current_clock_tick;
675
676	printf("cpu%u: address=%p, nrdy=%zu, needs_relink=%d\n",
677	cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy),
678	needs_relink);
679
680	unsigned int i;
681	for (i = 0; i < RQ_COUNT; i++) {
682	irq_spinlock_lock(&(cpus[cpu].rq[i].lock), false);
683	if (cpus[cpu].rq[i].n == 0) {
684	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
685	continue;
686	}
687
688	printf("\trq[%u]: ", i);
689	list_foreach(cpus[cpu].rq[i].rq, rq_link, thread_t,
690	thread) {
691	printf("%" PRIu64 "(%s) ", thread->tid,
692	thread_states[thread->state]);
693	}
694	printf("\n");
695
696	irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
697	}
698	}
699	}
700
701	/** @}
702	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/proc/scheduler.c@ f3dbe27

Download in other formats: