HelenOS Kernel (ppc32): kernel/generic/src/proc/scheduler.c Source File

00001 /*
00002  * Copyright (C) 2001-2004 Jakub Jermar
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * - Redistributions of source code must retain the above copyright
00010  *   notice, this list of conditions and the following disclaimer.
00011  * - Redistributions in binary form must reproduce the above copyright
00012  *   notice, this list of conditions and the following disclaimer in the
00013  *   documentation and/or other materials provided with the distribution.
00014  * - The name of the author may not be used to endorse or promote products
00015  *   derived from this software without specific prior written permission.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00018  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00019  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00020  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00021  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00022  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00023  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00024  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00025  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00026  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00027  */
00028 
00029  
00042 #include <proc/scheduler.h>
00043 #include <proc/thread.h>
00044 #include <proc/task.h>
00045 #include <mm/frame.h>
00046 #include <mm/page.h>
00047 #include <mm/as.h>
00048 #include <time/delay.h>
00049 #include <arch/asm.h>
00050 #include <arch/faddr.h>
00051 #include <atomic.h>
00052 #include <synch/spinlock.h>
00053 #include <config.h>
00054 #include <context.h>
00055 #include <func.h>
00056 #include <arch.h>
00057 #include <adt/list.h>
00058 #include <panic.h>
00059 #include <typedefs.h>
00060 #include <cpu.h>
00061 #include <print.h>
00062 #include <debug.h>
00063 
00064 static void before_task_runs(void);
00065 static void before_thread_runs(void);
00066 static void after_thread_ran(void);
00067 static void scheduler_separated_stack(void);
00068 
00069 atomic_t nrdy;  
00072 void before_task_runs(void)
00073 {
00074         before_task_runs_arch();
00075 }
00076 
00086 void before_thread_runs(void)
00087 {
00088         before_thread_runs_arch();
00089 #ifdef CONFIG_FPU_LAZY
00090         if(THREAD==CPU->fpu_owner) 
00091                 fpu_enable();
00092         else
00093                 fpu_disable(); 
00094 #else
00095         fpu_enable();
00096         if (THREAD->fpu_context_exists)
00097                 fpu_context_restore(THREAD->saved_fpu_context);
00098         else {
00099                 fpu_init();
00100                 THREAD->fpu_context_exists=1;
00101         }
00102 #endif
00103 }
00104 
00114 void after_thread_ran(void)
00115 {
00116         after_thread_ran_arch();
00117 }
00118 
00119 #ifdef CONFIG_FPU_LAZY
00120 void scheduler_fpu_lazy_request(void)
00121 {
00122 restart:
00123         fpu_enable();
00124         spinlock_lock(&CPU->lock);
00125 
00126         /* Save old context */
00127         if (CPU->fpu_owner != NULL) {  
00128                 spinlock_lock(&CPU->fpu_owner->lock);
00129                 fpu_context_save(CPU->fpu_owner->saved_fpu_context);
00130                 /* don't prevent migration */
00131                 CPU->fpu_owner->fpu_context_engaged=0; 
00132                 spinlock_unlock(&CPU->fpu_owner->lock);
00133                 CPU->fpu_owner = NULL;
00134         }
00135 
00136         spinlock_lock(&THREAD->lock);
00137         if (THREAD->fpu_context_exists) {
00138                 fpu_context_restore(THREAD->saved_fpu_context);
00139         } else {
00140                 /* Allocate FPU context */
00141                 if (!THREAD->saved_fpu_context) {
00142                         /* Might sleep */
00143                         spinlock_unlock(&THREAD->lock);
00144                         spinlock_unlock(&CPU->lock);
00145                         THREAD->saved_fpu_context = slab_alloc(fpu_context_slab,
00146                                                                0);
00147                         /* We may have switched CPUs during slab_alloc */
00148                         goto restart; 
00149                 }
00150                 fpu_init();
00151                 THREAD->fpu_context_exists=1;
00152         }
00153         CPU->fpu_owner=THREAD;
00154         THREAD->fpu_context_engaged = 1;
00155         spinlock_unlock(&THREAD->lock);
00156 
00157         spinlock_unlock(&CPU->lock);
00158 }
00159 #endif
00160 
00166 void scheduler_init(void)
00167 {
00168 }
00169 
00179 static thread_t *find_best_thread(void)
00180 {
00181         thread_t *t;
00182         runq_t *r;
00183         int i;
00184 
00185         ASSERT(CPU != NULL);
00186 
00187 loop:
00188         interrupts_enable();
00189         
00190         if (atomic_get(&CPU->nrdy) == 0) {
00191                 /*
00192                  * For there was nothing to run, the CPU goes to sleep
00193                  * until a hardware interrupt or an IPI comes.
00194                  * This improves energy saving and hyperthreading.
00195                  */
00196 
00197                 /*
00198                  * An interrupt might occur right now and wake up a thread.
00199                  * In such case, the CPU will continue to go to sleep
00200                  * even though there is a runnable thread.
00201                  */
00202 
00203                  cpu_sleep();
00204                  goto loop;
00205         }
00206 
00207         interrupts_disable();
00208         
00209         for (i = 0; i<RQ_COUNT; i++) {
00210                 r = &CPU->rq[i];
00211                 spinlock_lock(&r->lock);
00212                 if (r->n == 0) {
00213                         /*
00214                          * If this queue is empty, try a lower-priority queue.
00215                          */
00216                         spinlock_unlock(&r->lock);
00217                         continue;
00218                 }
00219 
00220                 atomic_dec(&CPU->nrdy);
00221                 atomic_dec(&nrdy);
00222                 r->n--;
00223 
00224                 /*
00225                  * Take the first thread from the queue.
00226                  */
00227                 t = list_get_instance(r->rq_head.next, thread_t, rq_link);
00228                 list_remove(&t->rq_link);
00229 
00230                 spinlock_unlock(&r->lock);
00231 
00232                 spinlock_lock(&t->lock);
00233                 t->cpu = CPU;
00234 
00235                 t->ticks = us2ticks((i+1)*10000);
00236                 t->priority = i;        /* correct rq index */
00237 
00238                 /*
00239                  * Clear the X_STOLEN flag so that t can be migrated when load balancing needs emerge.
00240                  */
00241                 t->flags &= ~X_STOLEN;
00242                 spinlock_unlock(&t->lock);
00243 
00244                 return t;
00245         }
00246         goto loop;
00247 
00248 }
00249 
00261 static void relink_rq(int start)
00262 {
00263         link_t head;
00264         runq_t *r;
00265         int i, n;
00266 
00267         list_initialize(&head);
00268         spinlock_lock(&CPU->lock);
00269         if (CPU->needs_relink > NEEDS_RELINK_MAX) {
00270                 for (i = start; i<RQ_COUNT-1; i++) {
00271                         /* remember and empty rq[i + 1] */
00272                         r = &CPU->rq[i + 1];
00273                         spinlock_lock(&r->lock);
00274                         list_concat(&head, &r->rq_head);
00275                         n = r->n;
00276                         r->n = 0;
00277                         spinlock_unlock(&r->lock);
00278                 
00279                         /* append rq[i + 1] to rq[i] */
00280                         r = &CPU->rq[i];
00281                         spinlock_lock(&r->lock);
00282                         list_concat(&r->rq_head, &head);
00283                         r->n += n;
00284                         spinlock_unlock(&r->lock);
00285                 }
00286                 CPU->needs_relink = 0;
00287         }
00288         spinlock_unlock(&CPU->lock);
00289 
00290 }
00291 
00299 void scheduler(void)
00300 {
00301         volatile ipl_t ipl;
00302 
00303         ASSERT(CPU != NULL);
00304 
00305         ipl = interrupts_disable();
00306 
00307         if (atomic_get(&haltstate))
00308                 halt();
00309         
00310         if (THREAD) {
00311                 spinlock_lock(&THREAD->lock);
00312 #ifndef CONFIG_FPU_LAZY
00313                 fpu_context_save(THREAD->saved_fpu_context);
00314 #endif
00315                 if (!context_save(&THREAD->saved_context)) {
00316                         /*
00317                          * This is the place where threads leave scheduler();
00318                          */
00319                         spinlock_unlock(&THREAD->lock);
00320                         interrupts_restore(THREAD->saved_context.ipl);
00321                         
00322                         return;
00323                 }
00324 
00325                 /*
00326                  * Interrupt priority level of preempted thread is recorded here
00327                  * to facilitate scheduler() invocations from interrupts_disable()'d
00328                  * code (e.g. waitq_sleep_timeout()). 
00329                  */
00330                 THREAD->saved_context.ipl = ipl;
00331         }
00332 
00333         /*
00334          * Through the 'THE' structure, we keep track of THREAD, TASK, CPU, VM
00335          * and preemption counter. At this point THE could be coming either
00336          * from THREAD's or CPU's stack.
00337          */
00338         the_copy(THE, (the_t *) CPU->stack);
00339 
00340         /*
00341          * We may not keep the old stack.
00342          * Reason: If we kept the old stack and got blocked, for instance, in
00343          * find_best_thread(), the old thread could get rescheduled by another
00344          * CPU and overwrite the part of its own stack that was also used by
00345          * the scheduler on this CPU.
00346          *
00347          * Moreover, we have to bypass the compiler-generated POP sequence
00348          * which is fooled by SP being set to the very top of the stack.
00349          * Therefore the scheduler() function continues in
00350          * scheduler_separated_stack().
00351          */
00352         context_save(&CPU->saved_context);
00353         context_set(&CPU->saved_context, FADDR(scheduler_separated_stack), (__address) CPU->stack, CPU_STACK_SIZE);
00354         context_restore(&CPU->saved_context);
00355         /* not reached */
00356 }
00357 
00366 void scheduler_separated_stack(void)
00367 {
00368         int priority;
00369         
00370         ASSERT(CPU != NULL);
00371         
00372         if (THREAD) {
00373                 /* must be run after the switch to scheduler stack */
00374                 after_thread_ran();
00375 
00376                 switch (THREAD->state) {
00377                     case Running:
00378                         spinlock_unlock(&THREAD->lock);
00379                         thread_ready(THREAD);
00380                         break;
00381 
00382                     case Exiting:
00383 repeat:
00384                         if (THREAD->detached) {
00385                                 thread_destroy(THREAD);
00386                         } else {
00387                                 /*
00388                                  * The thread structure is kept allocated until somebody
00389                                  * calls thread_detach() on it.
00390                                  */
00391                                 if (!spinlock_trylock(&THREAD->join_wq.lock)) {
00392                                         /*
00393                                          * Avoid deadlock.
00394                                          */
00395                                         spinlock_unlock(&THREAD->lock);
00396                                         delay(10);
00397                                         spinlock_lock(&THREAD->lock);
00398                                         goto repeat;
00399                                 }
00400                                 _waitq_wakeup_unsafe(&THREAD->join_wq, false);
00401                                 spinlock_unlock(&THREAD->join_wq.lock);
00402                                 
00403                                 THREAD->state = Undead;
00404                                 spinlock_unlock(&THREAD->lock);
00405                         }
00406                         break;
00407                         
00408                     case Sleeping:
00409                         /*
00410                          * Prefer the thread after it's woken up.
00411                          */
00412                         THREAD->priority = -1;
00413 
00414                         /*
00415                          * We need to release wq->lock which we locked in waitq_sleep().
00416                          * Address of wq->lock is kept in THREAD->sleep_queue.
00417                          */
00418                         spinlock_unlock(&THREAD->sleep_queue->lock);
00419 
00420                         /*
00421                          * Check for possible requests for out-of-context invocation.
00422                          */
00423                         if (THREAD->call_me) {
00424                                 THREAD->call_me(THREAD->call_me_with);
00425                                 THREAD->call_me = NULL;
00426                                 THREAD->call_me_with = NULL;
00427                         }
00428 
00429                         spinlock_unlock(&THREAD->lock);
00430 
00431                         break;
00432 
00433                     default:
00434                         /*
00435                          * Entering state is unexpected.
00436                          */
00437                         panic("tid%d: unexpected state %s\n", THREAD->tid, thread_states[THREAD->state]);
00438                         break;
00439                 }
00440 
00441                 THREAD = NULL;
00442         }
00443 
00444         THREAD = find_best_thread();
00445         
00446         spinlock_lock(&THREAD->lock);
00447         priority = THREAD->priority;
00448         spinlock_unlock(&THREAD->lock); 
00449 
00450         relink_rq(priority);            
00451 
00452         /*
00453          * If both the old and the new task are the same, lots of work is avoided.
00454          */
00455         if (TASK != THREAD->task) {
00456                 as_t *as1 = NULL;
00457                 as_t *as2;
00458 
00459                 if (TASK) {
00460                         spinlock_lock(&TASK->lock);
00461                         as1 = TASK->as;
00462                         spinlock_unlock(&TASK->lock);
00463                 }
00464 
00465                 spinlock_lock(&THREAD->task->lock);
00466                 as2 = THREAD->task->as;
00467                 spinlock_unlock(&THREAD->task->lock);
00468                 
00469                 /*
00470                  * Note that it is possible for two tasks to share one address space.
00471                  */
00472                 if (as1 != as2) {
00473                         /*
00474                          * Both tasks and address spaces are different.
00475                          * Replace the old one with the new one.
00476                          */
00477                         as_switch(as1, as2);
00478                 }
00479                 TASK = THREAD->task;
00480                 before_task_runs();
00481         }
00482 
00483         spinlock_lock(&THREAD->lock);   
00484         THREAD->state = Running;
00485 
00486 #ifdef SCHEDULER_VERBOSE
00487         printf("cpu%d: tid %d (priority=%d,ticks=%lld,nrdy=%ld)\n", CPU->id, THREAD->tid, THREAD->priority, THREAD->ticks, atomic_get(&CPU->nrdy));
00488 #endif  
00489 
00490         /*
00491          * Some architectures provide late kernel PA2KA(identity)
00492          * mapping in a page fault handler. However, the page fault
00493          * handler uses the kernel stack of the running thread and
00494          * therefore cannot be used to map it. The kernel stack, if
00495          * necessary, is to be mapped in before_thread_runs(). This
00496          * function must be executed before the switch to the new stack.
00497          */
00498         before_thread_runs();
00499 
00500         /*
00501          * Copy the knowledge of CPU, TASK, THREAD and preemption counter to thread's stack.
00502          */
00503         the_copy(THE, (the_t *) THREAD->kstack);
00504         
00505         context_restore(&THREAD->saved_context);
00506         /* not reached */
00507 }
00508 
00509 #ifdef CONFIG_SMP
00510 
00518 void kcpulb(void *arg)
00519 {
00520         thread_t *t;
00521         int count, average, i, j, k = 0;
00522         ipl_t ipl;
00523 
00524         /*
00525          * Detach kcpulb as nobody will call thread_join_timeout() on it.
00526          */
00527         thread_detach(THREAD);
00528         
00529 loop:
00530         /*
00531          * Work in 1s intervals.
00532          */
00533         thread_sleep(1);
00534 
00535 not_satisfied:
00536         /*
00537          * Calculate the number of threads that will be migrated/stolen from
00538          * other CPU's. Note that situation can have changed between two
00539          * passes. Each time get the most up to date counts.
00540          */
00541         average = atomic_get(&nrdy) / config.cpu_active + 1;
00542         count = average - atomic_get(&CPU->nrdy);
00543 
00544         if (count <= 0)
00545                 goto satisfied;
00546 
00547         /*
00548          * Searching least priority queues on all CPU's first and most priority queues on all CPU's last.
00549          */
00550         for (j=RQ_COUNT-1; j >= 0; j--) {
00551                 for (i=0; i < config.cpu_active; i++) {
00552                         link_t *l;
00553                         runq_t *r;
00554                         cpu_t *cpu;
00555 
00556                         cpu = &cpus[(i + k) % config.cpu_active];
00557 
00558                         /*
00559                          * Not interested in ourselves.
00560                          * Doesn't require interrupt disabling for kcpulb is X_WIRED.
00561                          */
00562                         if (CPU == cpu)
00563                                 continue;
00564                         if (atomic_get(&cpu->nrdy) <= average)
00565                                 continue;
00566 
00567                         ipl = interrupts_disable();
00568                         r = &cpu->rq[j];
00569                         spinlock_lock(&r->lock);
00570                         if (r->n == 0) {
00571                                 spinlock_unlock(&r->lock);
00572                                 interrupts_restore(ipl);
00573                                 continue;
00574                         }
00575                 
00576                         t = NULL;
00577                         l = r->rq_head.prev;    /* search rq from the back */
00578                         while (l != &r->rq_head) {
00579                                 t = list_get_instance(l, thread_t, rq_link);
00580                                 /*
00581                                  * We don't want to steal CPU-wired threads neither threads already stolen.
00582                                  * The latter prevents threads from migrating between CPU's without ever being run.
00583                                  * We don't want to steal threads whose FPU context is still in CPU.
00584                                  */
00585                                 spinlock_lock(&t->lock);
00586                                 if ( (!(t->flags & (X_WIRED | X_STOLEN))) && (!(t->fpu_context_engaged)) ) {
00587                                         /*
00588                                          * Remove t from r.
00589                                          */
00590                                         spinlock_unlock(&t->lock);
00591                                         
00592                                         atomic_dec(&cpu->nrdy);
00593                                         atomic_dec(&nrdy);
00594 
00595                                         r->n--;
00596                                         list_remove(&t->rq_link);
00597 
00598                                         break;
00599                                 }
00600                                 spinlock_unlock(&t->lock);
00601                                 l = l->prev;
00602                                 t = NULL;
00603                         }
00604                         spinlock_unlock(&r->lock);
00605 
00606                         if (t) {
00607                                 /*
00608                                  * Ready t on local CPU
00609                                  */
00610                                 spinlock_lock(&t->lock);
00611 #ifdef KCPULB_VERBOSE
00612                                 printf("kcpulb%d: TID %d -> cpu%d, nrdy=%ld, avg=%nd\n", CPU->id, t->tid, CPU->id, atomic_get(&CPU->nrdy), atomic_get(&nrdy) / config.cpu_active);
00613 #endif
00614                                 t->flags |= X_STOLEN;
00615                                 t->state = Entering;
00616                                 spinlock_unlock(&t->lock);
00617         
00618                                 thread_ready(t);
00619 
00620                                 interrupts_restore(ipl);
00621         
00622                                 if (--count == 0)
00623                                         goto satisfied;
00624                                         
00625                                 /*
00626                                  * We are not satisfied yet, focus on another CPU next time.
00627                                  */
00628                                 k++;
00629                                 
00630                                 continue;
00631                         }
00632                         interrupts_restore(ipl);
00633                 }
00634         }
00635 
00636         if (atomic_get(&CPU->nrdy)) {
00637                 /*
00638                  * Be a little bit light-weight and let migrated threads run.
00639                  */
00640                 scheduler();
00641         } else {
00642                 /*
00643                  * We failed to migrate a single thread.
00644                  * Give up this turn.
00645                  */
00646                 goto loop;
00647         }
00648                 
00649         goto not_satisfied;
00650 
00651 satisfied:
00652         goto loop;
00653 }
00654 
00655 #endif /* CONFIG_SMP */
00656 
00657 
00659 void sched_print_list(void)
00660 {
00661         ipl_t ipl;
00662         int cpu,i;
00663         runq_t *r;
00664         thread_t *t;
00665         link_t *cur;
00666 
00667         /* We are going to mess with scheduler structures,
00668          * let's not be interrupted */
00669         ipl = interrupts_disable();
00670         for (cpu=0;cpu < config.cpu_count; cpu++) {
00671 
00672                 if (!cpus[cpu].active)
00673                         continue;
00674 
00675                 spinlock_lock(&cpus[cpu].lock);
00676                 printf("cpu%d: address=%p, nrdy=%ld, needs_relink=%ld\n",
00677                        cpus[cpu].id, &cpus[cpu], atomic_get(&cpus[cpu].nrdy), cpus[cpu].needs_relink);
00678                 
00679                 for (i=0; i<RQ_COUNT; i++) {
00680                         r = &cpus[cpu].rq[i];
00681                         spinlock_lock(&r->lock);
00682                         if (!r->n) {
00683                                 spinlock_unlock(&r->lock);
00684                                 continue;
00685                         }
00686                         printf("\trq[%d]: ", i);
00687                         for (cur=r->rq_head.next; cur!=&r->rq_head; cur=cur->next) {
00688                                 t = list_get_instance(cur, thread_t, rq_link);
00689                                 printf("%d(%s) ", t->tid,
00690                                        thread_states[t->state]);
00691                         }
00692                         printf("\n");
00693                         spinlock_unlock(&r->lock);
00694                 }
00695                 spinlock_unlock(&cpus[cpu].lock);
00696         }
00697         
00698         interrupts_restore(ipl);
00699 }
00700