Changeset f3dbe27 in mainline


Ignore:
Timestamp:
2023-04-18T17:33:02Z (15 months ago)
Author:
Jiří Zárevúcky <zarevucky.jiri@…>
Branches:
master, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
117ad5a2
Parents:
06f81c4
git-author:
Jiří Zárevúcky <zarevucky.jiri@…> (2023-04-18 17:27:32)
git-committer:
Jiří Zárevúcky <zarevucky.jiri@…> (2023-04-18 17:33:02)
Message:

Reduce locking further with lazy FPU

It turns out we only need a lock to synchronize between the trap
handler and thread destructor. The atomic operations introduced
are just plain reads and writes, written in an ugly fashion to
appease C11 undefined behavior gods.

In principle we could get rid of that if we made cpu_t::fpu_owner
a strong reference, but that would mean a thread structure could
be held in limbo indefinitely if a new thread is not being
scheduled or doesn't use FPU.

Location:
kernel
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • kernel/arch/amd64/src/cpu/cpu.c

    r06f81c4 rf3dbe27  
    9999        CPU->arch.tss->iomap_base = &CPU->arch.tss->iomap[0] -
    100100            ((uint8_t *) CPU->arch.tss);
    101         CPU->fpu_owner = NULL;
    102101}
    103102
  • kernel/arch/ia32/src/cpu/cpu.c

    r06f81c4 rf3dbe27  
    8787        CPU->arch.tss->iomap_base = &CPU->arch.tss->iomap[0] - ((uint8_t *) CPU->arch.tss);
    8888
    89         CPU->fpu_owner = NULL;
    90 
    9189        cpuid(INTEL_CPUID_STANDARD, &info);
    9290
  • kernel/generic/include/cpu.h

    r06f81c4 rf3dbe27  
    9898
    9999#ifdef CONFIG_FPU_LAZY
     100        /* For synchronization between FPU trap and thread destructor. */
    100101        IRQ_SPINLOCK_DECLARE(fpu_lock);
    101         struct thread *fpu_owner;
    102102#endif
     103        _Atomic(struct thread *) fpu_owner;
    103104
    104105        /**
  • kernel/generic/src/proc/scheduler.c

    r06f81c4 rf3dbe27  
    8484
    8585#ifdef CONFIG_FPU_LAZY
    86         irq_spinlock_lock(&CPU->fpu_lock, true);
    87 
    88         if (THREAD == CPU->fpu_owner)
     86        /*
     87         * The only concurrent modification possible for fpu_owner here is
     88         * another thread changing it from itself to NULL in its destructor.
     89         */
     90        thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
     91            memory_order_relaxed);
     92
     93        if (THREAD == owner)
    8994                fpu_enable();
    9095        else
    9196                fpu_disable();
    92 
    93         irq_spinlock_unlock(&CPU->fpu_lock, true);
    9497#elif defined CONFIG_FPU
    9598        fpu_enable();
     
    133136{
    134137        fpu_enable();
     138
     139        /* We need this lock to ensure synchronization with thread destructor. */
    135140        irq_spinlock_lock(&CPU->fpu_lock, false);
    136141
    137142        /* Save old context */
    138         if (CPU->fpu_owner != NULL) {
    139                 fpu_context_save(&CPU->fpu_owner->fpu_context);
    140                 CPU->fpu_owner = NULL;
    141         }
     143        thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
     144        if (owner != NULL) {
     145                fpu_context_save(&owner->fpu_context);
     146                atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
     147        }
     148
     149        irq_spinlock_unlock(&CPU->fpu_lock, false);
    142150
    143151        if (THREAD->fpu_context_exists) {
     
    148156        }
    149157
    150         CPU->fpu_owner = THREAD;
    151 
    152         irq_spinlock_unlock(&CPU->fpu_lock, false);
     158        atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
    153159}
    154160#endif /* CONFIG_FPU_LAZY */
     
    499505#ifdef CONFIG_SMP
    500506
    501 static inline void fpu_owner_lock(cpu_t *cpu)
    502 {
    503 #ifdef CONFIG_FPU_LAZY
    504         irq_spinlock_lock(&cpu->fpu_lock, false);
    505 #endif
    506 }
    507 
    508 static inline void fpu_owner_unlock(cpu_t *cpu)
    509 {
    510 #ifdef CONFIG_FPU_LAZY
    511         irq_spinlock_unlock(&cpu->fpu_lock, false);
    512 #endif
    513 }
    514 
    515 static inline thread_t *fpu_owner(cpu_t *cpu)
    516 {
    517 #ifdef CONFIG_FPU_LAZY
    518         assert(irq_spinlock_locked(&cpu->fpu_lock));
    519         return cpu->fpu_owner;
    520 #else
    521         return NULL;
    522 #endif
    523 }
    524 
    525507static thread_t *steal_thread_from(cpu_t *old_cpu, int i)
    526508{
     
    530512        ipl_t ipl = interrupts_disable();
    531513
    532         fpu_owner_lock(old_cpu);
    533514        irq_spinlock_lock(&old_rq->lock, false);
     515
     516        /*
     517         * If fpu_owner is any thread in the list, its store is seen here thanks to
     518         * the runqueue lock.
     519         */
     520        thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
     521            memory_order_relaxed);
    534522
    535523        /* Search rq from the back */
     
    545533                 */
    546534                if (thread->stolen || thread->nomigrate ||
    547                     thread == fpu_owner(old_cpu)) {
     535                    thread == fpu_owner) {
    548536                        irq_spinlock_unlock(&thread->lock, false);
    549537                        continue;
    550538                }
    551 
    552                 fpu_owner_unlock(old_cpu);
    553539
    554540                thread->stolen = true;
     
    587573
    588574        irq_spinlock_unlock(&old_rq->lock, false);
    589         fpu_owner_unlock(old_cpu);
    590575        interrupts_restore(ipl);
    591576        return NULL;
  • kernel/generic/src/proc/thread.c

    r06f81c4 rf3dbe27  
    415415#ifdef CONFIG_FPU_LAZY
    416416        if (thread->cpu) {
     417                /*
     418                 * We need to lock for this because the old CPU can concurrently try
     419                 * to dump this thread's FPU state, in which case we need to wait for
     420                 * it to finish. An atomic compare-and-swap wouldn't be enough.
     421                 */
    417422                irq_spinlock_lock(&thread->cpu->fpu_lock, false);
    418                 if (thread->cpu->fpu_owner == thread)
    419                         thread->cpu->fpu_owner = NULL;
     423
     424                thread_t *owner = atomic_load_explicit(&thread->cpu->fpu_owner,
     425                    memory_order_relaxed);
     426
     427                if (owner == thread) {
     428                        atomic_store_explicit(&thread->cpu->fpu_owner, NULL,
     429                            memory_order_relaxed);
     430                }
     431
    420432                irq_spinlock_unlock(&thread->cpu->fpu_lock, false);
    421433        }
Note: See TracChangeset for help on using the changeset viewer.