source: mainline/kernel/generic/src/mm/as.c@ 6785b88b

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 6785b88b was 6785b88b, checked in by Jiri Svoboda <jiri@…>, 7 years ago

Fix indentation.

  • Property mode set to 100644
File size: 57.2 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * Copyright (c) 2018 Jiri Svoboda
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup kernel_generic_mm
31 * @{
32 */
33
34/**
35 * @file
36 * @brief Address space related functions.
37 *
38 * This file contains address space manipulation functions.
39 * Roughly speaking, this is a higher-level client of
40 * Virtual Address Translation (VAT) subsystem.
41 *
42 * Functionality provided by this file allows one to
43 * create address spaces and create, resize and share
44 * address space areas.
45 *
46 * @see page.c
47 *
48 */
49
50#include <mm/as.h>
51#include <arch/mm/as.h>
52#include <mm/page.h>
53#include <mm/frame.h>
54#include <mm/slab.h>
55#include <mm/tlb.h>
56#include <arch/mm/page.h>
57#include <genarch/mm/page_pt.h>
58#include <genarch/mm/page_ht.h>
59#include <mm/asid.h>
60#include <arch/mm/asid.h>
61#include <preemption.h>
62#include <synch/spinlock.h>
63#include <synch/mutex.h>
64#include <adt/list.h>
65#include <adt/btree.h>
66#include <proc/task.h>
67#include <proc/thread.h>
68#include <arch/asm.h>
69#include <panic.h>
70#include <assert.h>
71#include <stdio.h>
72#include <mem.h>
73#include <macros.h>
74#include <bitops.h>
75#include <arch.h>
76#include <errno.h>
77#include <config.h>
78#include <align.h>
79#include <typedefs.h>
80#include <syscall/copy.h>
81#include <arch/interrupt.h>
82#include <interrupt.h>
83
84/**
85 * Each architecture decides what functions will be used to carry out
86 * address space operations such as creating or locking page tables.
87 */
88as_operations_t *as_operations = NULL;
89
90/** Slab for as_t objects.
91 *
92 */
93static slab_cache_t *as_cache;
94
95/** ASID subsystem lock.
96 *
97 * This lock protects:
98 * - inactive_as_with_asid_list
99 * - as->asid for each as of the as_t type
100 * - asids_allocated counter
101 *
102 */
103SPINLOCK_INITIALIZE(asidlock);
104
105/**
106 * Inactive address spaces (on all processors)
107 * that have valid ASID.
108 */
109LIST_INITIALIZE(inactive_as_with_asid_list);
110
111/** Kernel address space. */
112as_t *AS_KERNEL = NULL;
113
114static void *as_areas_getkey(odlink_t *);
115static int as_areas_cmp(void *, void *);
116
117NO_TRACE static errno_t as_constructor(void *obj, unsigned int flags)
118{
119 as_t *as = (as_t *) obj;
120
121 link_initialize(&as->inactive_as_with_asid_link);
122 mutex_initialize(&as->lock, MUTEX_PASSIVE);
123
124 return as_constructor_arch(as, flags);
125}
126
127NO_TRACE static size_t as_destructor(void *obj)
128{
129 return as_destructor_arch((as_t *) obj);
130}
131
132/** Initialize address space subsystem. */
133void as_init(void)
134{
135 as_arch_init();
136
137 as_cache = slab_cache_create("as_t", sizeof(as_t), 0,
138 as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
139
140 AS_KERNEL = as_create(FLAG_AS_KERNEL);
141 if (!AS_KERNEL)
142 panic("Cannot create kernel address space.");
143
144 /*
145 * Make sure the kernel address space
146 * reference count never drops to zero.
147 */
148 as_hold(AS_KERNEL);
149}
150
151/** Create address space.
152 *
153 * @param flags Flags that influence the way in wich the address
154 * space is created.
155 *
156 */
157as_t *as_create(unsigned int flags)
158{
159 as_t *as = (as_t *) slab_alloc(as_cache, 0);
160 (void) as_create_arch(as, 0);
161
162 odict_initialize(&as->as_areas, as_areas_getkey, as_areas_cmp);
163
164 if (flags & FLAG_AS_KERNEL)
165 as->asid = ASID_KERNEL;
166 else
167 as->asid = ASID_INVALID;
168
169 refcount_init(&as->refcount);
170 as->cpu_refcount = 0;
171
172#ifdef AS_PAGE_TABLE
173 as->genarch.page_table = page_table_create(flags);
174#else
175 page_table_create(flags);
176#endif
177
178 return as;
179}
180
181/** Destroy adress space.
182 *
183 * When there are no tasks referencing this address space (i.e. its refcount is
184 * zero), the address space can be destroyed.
185 *
186 * We know that we don't hold any spinlock.
187 *
188 * @param as Address space to be destroyed.
189 *
190 */
191void as_destroy(as_t *as)
192{
193 DEADLOCK_PROBE_INIT(p_asidlock);
194
195 assert(as != AS);
196 assert(refcount_unique(&as->refcount));
197
198 /*
199 * Since there is no reference to this address space, it is safe not to
200 * lock its mutex.
201 */
202
203 /*
204 * We need to avoid deadlock between TLB shootdown and asidlock.
205 * We therefore try to take asid conditionally and if we don't succeed,
206 * we enable interrupts and try again. This is done while preemption is
207 * disabled to prevent nested context switches. We also depend on the
208 * fact that so far no spinlocks are held.
209 */
210 preemption_disable();
211 ipl_t ipl = interrupts_read();
212
213retry:
214 interrupts_disable();
215 if (!spinlock_trylock(&asidlock)) {
216 interrupts_enable();
217 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
218 goto retry;
219 }
220
221 /* Interrupts disabled, enable preemption */
222 preemption_enable();
223
224 if ((as->asid != ASID_INVALID) && (as != AS_KERNEL)) {
225 if (as->cpu_refcount == 0)
226 list_remove(&as->inactive_as_with_asid_link);
227
228 asid_put(as->asid);
229 }
230
231 spinlock_unlock(&asidlock);
232 interrupts_restore(ipl);
233
234 /*
235 * Destroy address space areas of the address space.
236 * Need to start from the beginning each time since we are destroying
237 * the areas.
238 */
239 as_area_t *area = as_area_first(as);
240 while (area != NULL) {
241 /*
242 * XXX We already have as_area_t, but as_area_destroy will
243 * have to search for it. This could be made faster.
244 */
245 as_area_destroy(as, area->base);
246 area = as_area_first(as);
247 }
248
249 odict_finalize(&as->as_areas);
250
251#ifdef AS_PAGE_TABLE
252 page_table_destroy(as->genarch.page_table);
253#else
254 page_table_destroy(NULL);
255#endif
256
257 slab_free(as_cache, as);
258}
259
260/** Hold a reference to an address space.
261 *
262 * Holding a reference to an address space prevents destruction
263 * of that address space.
264 *
265 * @param as Address space to be held.
266 *
267 */
268NO_TRACE void as_hold(as_t *as)
269{
270 refcount_up(&as->refcount);
271}
272
273/** Release a reference to an address space.
274 *
275 * The last one to release a reference to an address space
276 * destroys the address space.
277 *
278 * @param as Address space to be released.
279 *
280 */
281NO_TRACE void as_release(as_t *as)
282{
283 if (refcount_down(&as->refcount))
284 as_destroy(as);
285}
286
287/** Return first address space area.
288 *
289 * @param as Address space
290 * @return First area in @a as (i.e. area with the lowest base address)
291 * or @c NULL if there is none
292 */
293as_area_t *as_area_first(as_t *as)
294{
295 odlink_t *odlink = odict_first(&as->as_areas);
296 if (odlink == NULL)
297 return NULL;
298
299 return odict_get_instance(odlink, as_area_t, las_areas);
300}
301
302/** Return next address space area.
303 *
304 * @param cur Current area
305 * @return Next area in the same address space or @c NULL if @a cur is the
306 * last area.
307 */
308as_area_t *as_area_next(as_area_t *cur)
309{
310 odlink_t *odlink = odict_next(&cur->las_areas, &cur->as->as_areas);
311 if (odlink == NULL)
312 return NULL;
313
314 return odict_get_instance(odlink, as_area_t, las_areas);
315}
316
317/** Determine if area with specified parameters would conflict with
318 * a specific existing address space area.
319 *
320 * @param addr Starting virtual address of the area being tested.
321 * @param count Number of pages in the area being tested.
322 * @param guarded True if the area being tested is protected by guard pages.
323 * @param area Area against which we are testing.
324 *
325 * @return True if the two areas conflict, false otherwise.
326 */
327NO_TRACE static bool area_is_conflicting(uintptr_t addr,
328 size_t count, bool guarded, as_area_t *area)
329{
330 assert((addr % PAGE_SIZE) == 0);
331
332 size_t gsize = P2SZ(count);
333 size_t agsize = P2SZ(area->pages);
334
335 /*
336 * A guarded area has one guard page before, one page after.
337 * What we do here is: if either area is guarded, we add
338 * PAGE_SIZE to the size of both areas. That guarantees
339 * they will be spaced at least one page apart.
340 */
341 if (guarded || (area->flags & AS_AREA_GUARD) != 0) {
342 /* Add guard page size unless area is at the end of VA domain */
343 if (!overflows(addr, P2SZ(count)))
344 gsize += PAGE_SIZE;
345
346 /* Add guard page size unless area is at the end of VA domain */
347 if (!overflows(area->base, P2SZ(area->pages)))
348 agsize += PAGE_SIZE;
349 }
350
351 return overlaps(addr, gsize, area->base, agsize);
352
353}
354
355/** Check area conflicts with other areas.
356 *
357 * @param as Address space.
358 * @param addr Starting virtual address of the area being tested.
359 * @param count Number of pages in the area being tested.
360 * @param guarded True if the area being tested is protected by guard pages.
361 * @param avoid Do not touch this area. I.e. this area is not considered
362 * as presenting a conflict.
363 *
364 * @return True if there is no conflict, false otherwise.
365 *
366 */
367NO_TRACE static bool check_area_conflicts(as_t *as, uintptr_t addr,
368 size_t count, bool guarded, as_area_t *avoid)
369{
370 assert((addr % PAGE_SIZE) == 0);
371 assert(mutex_locked(&as->lock));
372
373 /*
374 * If the addition of the supposed area address and size overflows,
375 * report conflict.
376 */
377 if (overflows_into_positive(addr, P2SZ(count)))
378 return false;
379
380 /*
381 * We don't want any area to have conflicts with NULL page.
382 */
383 if (overlaps(addr, P2SZ(count), (uintptr_t) NULL, PAGE_SIZE))
384 return false;
385
386 /*
387 * To determine if we overlap with another area, we just need
388 * to look at overlap with the last area with base address <=
389 * to ours and on the first area with base address > than ours.
390 *
391 * First find last area with <= base address.
392 */
393 odlink_t *odlink = odict_find_leq(&as->as_areas, &addr, NULL);
394 if (odlink != NULL) {
395 as_area_t *area = odict_get_instance(odlink, as_area_t,
396 las_areas);
397
398 if (area != avoid) {
399 mutex_lock(&area->lock);
400 if (area_is_conflicting(addr, count, guarded, area)) {
401 mutex_unlock(&area->lock);
402 return false;
403 }
404
405 mutex_unlock(&area->lock);
406 }
407
408 /* Next area */
409 odlink = odict_next(odlink, &as->as_areas);
410 }
411
412 /*
413 * Next area, if any, is the first with base > than our base address.
414 * If there was no area with <= base, we need to look at the first area.
415 */
416 if (odlink == NULL)
417 odlink = odict_first(&as->as_areas);
418
419 if (odlink != NULL) {
420 as_area_t *area = odict_get_instance(odlink, as_area_t,
421 las_areas);
422
423 if (area != avoid) {
424 mutex_lock(&area->lock);
425 if (area_is_conflicting(addr, count, guarded, area)) {
426 mutex_unlock(&area->lock);
427 return false;
428 }
429
430 mutex_unlock(&area->lock);
431 }
432 }
433
434 /*
435 * So far, the area does not conflict with other areas.
436 * Check if it is contained in the user address space.
437 */
438 if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
439 return iswithin(USER_ADDRESS_SPACE_START,
440 (USER_ADDRESS_SPACE_END - USER_ADDRESS_SPACE_START) + 1,
441 addr, P2SZ(count));
442 }
443
444 return true;
445}
446
447/** Return pointer to unmapped address space area
448 *
449 * The address space must be already locked when calling
450 * this function.
451 *
452 * @param as Address space.
453 * @param bound Lowest address bound.
454 * @param size Requested size of the allocation.
455 * @param guarded True if the allocation must be protected by guard pages.
456 *
457 * @return Address of the beginning of unmapped address space area.
458 * @return -1 if no suitable address space area was found.
459 *
460 */
461NO_TRACE static uintptr_t as_get_unmapped_area(as_t *as, uintptr_t bound,
462 size_t size, bool guarded)
463{
464 assert(mutex_locked(&as->lock));
465
466 if (size == 0)
467 return (uintptr_t) -1;
468
469 /*
470 * Make sure we allocate from page-aligned
471 * address. Check for possible overflow in
472 * each step.
473 */
474
475 size_t pages = SIZE2FRAMES(size);
476
477 /*
478 * Find the lowest unmapped address aligned on the size
479 * boundary, not smaller than bound and of the required size.
480 */
481
482 /* First check the bound address itself */
483 uintptr_t addr = ALIGN_UP(bound, PAGE_SIZE);
484 if (addr >= bound) {
485 if (guarded) {
486 /*
487 * Leave an unmapped page between the lower
488 * bound and the area's start address.
489 */
490 addr += P2SZ(1);
491 }
492
493 if (check_area_conflicts(as, addr, pages, guarded, NULL))
494 return addr;
495 }
496
497 /* Eventually check the addresses behind each area */
498 as_area_t *area = as_area_first(as);
499 while (area != NULL) {
500 mutex_lock(&area->lock);
501
502 addr = area->base + P2SZ(area->pages);
503
504 if (guarded || area->flags & AS_AREA_GUARD) {
505 /*
506 * We must leave an unmapped page
507 * between the two areas.
508 */
509 addr += P2SZ(1);
510 }
511
512 bool avail =
513 ((addr >= bound) && (addr >= area->base) &&
514 (check_area_conflicts(as, addr, pages, guarded, area)));
515
516 mutex_unlock(&area->lock);
517
518 if (avail)
519 return addr;
520
521 area = as_area_next(area);
522 }
523
524 /* No suitable address space area found */
525 return (uintptr_t) -1;
526}
527
528/** Remove reference to address space area share info.
529 *
530 * If the reference count drops to 0, the sh_info is deallocated.
531 *
532 * @param sh_info Pointer to address space area share info.
533 *
534 */
535NO_TRACE static void sh_info_remove_reference(share_info_t *sh_info)
536{
537 bool dealloc = false;
538
539 mutex_lock(&sh_info->lock);
540 assert(sh_info->refcount);
541
542 if (--sh_info->refcount == 0) {
543 dealloc = true;
544
545 /*
546 * Now walk carefully the pagemap B+tree and free/remove
547 * reference from all frames found there.
548 */
549 list_foreach(sh_info->pagemap.leaf_list, leaf_link,
550 btree_node_t, node) {
551 btree_key_t i;
552
553 for (i = 0; i < node->keys; i++)
554 frame_free((uintptr_t) node->value[i], 1);
555 }
556
557 }
558 mutex_unlock(&sh_info->lock);
559
560 if (dealloc) {
561 if (sh_info->backend && sh_info->backend->destroy_shared_data) {
562 sh_info->backend->destroy_shared_data(
563 sh_info->backend_shared_data);
564 }
565 btree_destroy(&sh_info->pagemap);
566 free(sh_info);
567 }
568}
569
570/** Create address space area of common attributes.
571 *
572 * The created address space area is added to the target address space.
573 *
574 * @param as Target address space.
575 * @param flags Flags of the area memory.
576 * @param size Size of area.
577 * @param attrs Attributes of the area.
578 * @param backend Address space area backend. NULL if no backend is used.
579 * @param backend_data NULL or a pointer to custom backend data.
580 * @param base Starting virtual address of the area.
581 * If set to AS_AREA_ANY, a suitable mappable area is
582 * found.
583 * @param bound Lowest address bound if base is set to AS_AREA_ANY.
584 * Otherwise ignored.
585 *
586 * @return Address space area on success or NULL on failure.
587 *
588 */
589as_area_t *as_area_create(as_t *as, unsigned int flags, size_t size,
590 unsigned int attrs, mem_backend_t *backend,
591 mem_backend_data_t *backend_data, uintptr_t *base, uintptr_t bound)
592{
593 if ((*base != (uintptr_t) AS_AREA_ANY) && !IS_ALIGNED(*base, PAGE_SIZE))
594 return NULL;
595
596 if (size == 0)
597 return NULL;
598
599 size_t pages = SIZE2FRAMES(size);
600
601 /* Writeable executable areas are not supported. */
602 if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
603 return NULL;
604
605 bool const guarded = flags & AS_AREA_GUARD;
606
607 mutex_lock(&as->lock);
608
609 if (*base == (uintptr_t) AS_AREA_ANY) {
610 *base = as_get_unmapped_area(as, bound, size, guarded);
611 if (*base == (uintptr_t) -1) {
612 mutex_unlock(&as->lock);
613 return NULL;
614 }
615 }
616
617 if (overflows_into_positive(*base, size)) {
618 mutex_unlock(&as->lock);
619 return NULL;
620 }
621
622 if (!check_area_conflicts(as, *base, pages, guarded, NULL)) {
623 mutex_unlock(&as->lock);
624 return NULL;
625 }
626
627 as_area_t *area = (as_area_t *) malloc(sizeof(as_area_t));
628 if (!area) {
629 mutex_unlock(&as->lock);
630 return NULL;
631 }
632
633 mutex_initialize(&area->lock, MUTEX_PASSIVE);
634
635 area->as = as;
636 odlink_initialize(&area->las_areas);
637 area->flags = flags;
638 area->attributes = attrs;
639 area->pages = pages;
640 area->resident = 0;
641 area->base = *base;
642 area->backend = backend;
643 area->sh_info = NULL;
644
645 if (backend_data)
646 area->backend_data = *backend_data;
647 else
648 memsetb(&area->backend_data, sizeof(area->backend_data), 0);
649
650 share_info_t *si = NULL;
651
652 /*
653 * Create the sharing info structure.
654 * We do this in advance for every new area, even if it is not going
655 * to be shared.
656 */
657 if (!(attrs & AS_AREA_ATTR_PARTIAL)) {
658 si = (share_info_t *) malloc(sizeof(share_info_t));
659 if (!si) {
660 free(area);
661 mutex_unlock(&as->lock);
662 return NULL;
663 }
664 mutex_initialize(&si->lock, MUTEX_PASSIVE);
665 si->refcount = 1;
666 si->shared = false;
667 si->backend_shared_data = NULL;
668 si->backend = backend;
669 btree_create(&si->pagemap);
670
671 area->sh_info = si;
672
673 if (area->backend && area->backend->create_shared_data) {
674 if (!area->backend->create_shared_data(area)) {
675 free(area);
676 mutex_unlock(&as->lock);
677 sh_info_remove_reference(si);
678 return NULL;
679 }
680 }
681 }
682
683 if (area->backend && area->backend->create) {
684 if (!area->backend->create(area)) {
685 free(area);
686 mutex_unlock(&as->lock);
687 if (!(attrs & AS_AREA_ATTR_PARTIAL))
688 sh_info_remove_reference(si);
689 return NULL;
690 }
691 }
692
693 btree_create(&area->used_space);
694 odict_insert(&area->las_areas, &as->as_areas, NULL);
695
696 mutex_unlock(&as->lock);
697
698 return area;
699}
700
701/** Find address space area and lock it.
702 *
703 * @param as Address space.
704 * @param va Virtual address.
705 *
706 * @return Locked address space area containing va on success or
707 * NULL on failure.
708 *
709 */
710NO_TRACE static as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
711{
712 assert(mutex_locked(&as->lock));
713
714 odlink_t *odlink = odict_find_leq(&as->as_areas, &va, NULL);
715 if (odlink == NULL)
716 return NULL;
717
718 as_area_t *area = odict_get_instance(odlink, as_area_t, las_areas);
719 mutex_lock(&area->lock);
720
721 assert(area->base <= va);
722
723 if (va <= area->base + (P2SZ(area->pages) - 1))
724 return area;
725
726 mutex_unlock(&area->lock);
727 return NULL;
728}
729
730/** Find address space area and change it.
731 *
732 * @param as Address space.
733 * @param address Virtual address belonging to the area to be changed.
734 * Must be page-aligned.
735 * @param size New size of the virtual memory block starting at
736 * address.
737 * @param flags Flags influencing the remap operation. Currently unused.
738 *
739 * @return Zero on success or a value from @ref errno.h otherwise.
740 *
741 */
742errno_t as_area_resize(as_t *as, uintptr_t address, size_t size, unsigned int flags)
743{
744 if (!IS_ALIGNED(address, PAGE_SIZE))
745 return EINVAL;
746
747 mutex_lock(&as->lock);
748
749 /*
750 * Locate the area.
751 */
752 as_area_t *area = find_area_and_lock(as, address);
753 if (!area) {
754 mutex_unlock(&as->lock);
755 return ENOENT;
756 }
757
758 if (!area->backend->is_resizable(area)) {
759 /*
760 * The backend does not support resizing for this area.
761 */
762 mutex_unlock(&area->lock);
763 mutex_unlock(&as->lock);
764 return ENOTSUP;
765 }
766
767 mutex_lock(&area->sh_info->lock);
768 if (area->sh_info->shared) {
769 /*
770 * Remapping of shared address space areas
771 * is not supported.
772 */
773 mutex_unlock(&area->sh_info->lock);
774 mutex_unlock(&area->lock);
775 mutex_unlock(&as->lock);
776 return ENOTSUP;
777 }
778 mutex_unlock(&area->sh_info->lock);
779
780 size_t pages = SIZE2FRAMES((address - area->base) + size);
781 if (!pages) {
782 /*
783 * Zero size address space areas are not allowed.
784 */
785 mutex_unlock(&area->lock);
786 mutex_unlock(&as->lock);
787 return EPERM;
788 }
789
790 if (pages < area->pages) {
791 uintptr_t start_free = area->base + P2SZ(pages);
792
793 /*
794 * Shrinking the area.
795 * No need to check for overlaps.
796 */
797
798 page_table_lock(as, false);
799
800 /*
801 * Remove frames belonging to used space starting from
802 * the highest addresses downwards until an overlap with
803 * the resized address space area is found. Note that this
804 * is also the right way to remove part of the used_space
805 * B+tree leaf list.
806 */
807 bool cond = true;
808 while (cond) {
809 assert(!list_empty(&area->used_space.leaf_list));
810
811 btree_node_t *node =
812 list_get_instance(list_last(&area->used_space.leaf_list),
813 btree_node_t, leaf_link);
814
815 if ((cond = (node->keys != 0))) {
816 uintptr_t ptr = node->key[node->keys - 1];
817 size_t node_size =
818 (size_t) node->value[node->keys - 1];
819 size_t i = 0;
820
821 if (overlaps(ptr, P2SZ(node_size), area->base,
822 P2SZ(pages))) {
823
824 if (ptr + P2SZ(node_size) <= start_free) {
825 /*
826 * The whole interval fits
827 * completely in the resized
828 * address space area.
829 */
830 break;
831 }
832
833 /*
834 * Part of the interval corresponding
835 * to b and c overlaps with the resized
836 * address space area.
837 */
838
839 /* We are almost done */
840 cond = false;
841 i = (start_free - ptr) >> PAGE_WIDTH;
842 if (!used_space_remove(area, start_free,
843 node_size - i))
844 panic("Cannot remove used space.");
845 } else {
846 /*
847 * The interval of used space can be
848 * completely removed.
849 */
850 if (!used_space_remove(area, ptr, node_size))
851 panic("Cannot remove used space.");
852 }
853
854 /*
855 * Start TLB shootdown sequence.
856 *
857 * The sequence is rather short and can be
858 * repeated multiple times. The reason is that
859 * we don't want to have used_space_remove()
860 * inside the sequence as it may use a blocking
861 * memory allocation for its B+tree. Blocking
862 * while holding the tlblock spinlock is
863 * forbidden and would hit a kernel assertion.
864 */
865
866 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES,
867 as->asid, area->base + P2SZ(pages),
868 area->pages - pages);
869
870 for (; i < node_size; i++) {
871 pte_t pte;
872 bool found = page_mapping_find(as,
873 ptr + P2SZ(i), false, &pte);
874
875 assert(found);
876 assert(PTE_VALID(&pte));
877 assert(PTE_PRESENT(&pte));
878
879 if ((area->backend) &&
880 (area->backend->frame_free)) {
881 area->backend->frame_free(area,
882 ptr + P2SZ(i),
883 PTE_GET_FRAME(&pte));
884 }
885
886 page_mapping_remove(as, ptr + P2SZ(i));
887 }
888
889 /*
890 * Finish TLB shootdown sequence.
891 */
892
893 tlb_invalidate_pages(as->asid,
894 area->base + P2SZ(pages),
895 area->pages - pages);
896
897 /*
898 * Invalidate software translation caches
899 * (e.g. TSB on sparc64, PHT on ppc32).
900 */
901 as_invalidate_translation_cache(as,
902 area->base + P2SZ(pages),
903 area->pages - pages);
904 tlb_shootdown_finalize(ipl);
905 }
906 }
907 page_table_unlock(as, false);
908 } else {
909 /*
910 * Growing the area.
911 */
912
913 if (overflows_into_positive(address, P2SZ(pages)))
914 return EINVAL;
915
916 /*
917 * Check for overlaps with other address space areas.
918 */
919 bool const guarded = area->flags & AS_AREA_GUARD;
920 if (!check_area_conflicts(as, address, pages, guarded, area)) {
921 mutex_unlock(&area->lock);
922 mutex_unlock(&as->lock);
923 return EADDRNOTAVAIL;
924 }
925 }
926
927 if (area->backend && area->backend->resize) {
928 if (!area->backend->resize(area, pages)) {
929 mutex_unlock(&area->lock);
930 mutex_unlock(&as->lock);
931 return ENOMEM;
932 }
933 }
934
935 area->pages = pages;
936
937 mutex_unlock(&area->lock);
938 mutex_unlock(&as->lock);
939
940 return 0;
941}
942
943/** Destroy address space area.
944 *
945 * @param as Address space.
946 * @param address Address within the area to be deleted.
947 *
948 * @return Zero on success or a value from @ref errno.h on failure.
949 *
950 */
951errno_t as_area_destroy(as_t *as, uintptr_t address)
952{
953 mutex_lock(&as->lock);
954
955 as_area_t *area = find_area_and_lock(as, address);
956 if (!area) {
957 mutex_unlock(&as->lock);
958 return ENOENT;
959 }
960
961 if (area->backend && area->backend->destroy)
962 area->backend->destroy(area);
963
964 page_table_lock(as, false);
965
966 /*
967 * Start TLB shootdown sequence.
968 */
969 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
970 area->pages);
971
972 /*
973 * Visit only the pages mapped by used_space B+tree.
974 */
975 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
976 node) {
977 btree_key_t i;
978
979 for (i = 0; i < node->keys; i++) {
980 uintptr_t ptr = node->key[i];
981 size_t size;
982
983 for (size = 0; size < (size_t) node->value[i]; size++) {
984 pte_t pte;
985 bool found = page_mapping_find(as,
986 ptr + P2SZ(size), false, &pte);
987
988 assert(found);
989 assert(PTE_VALID(&pte));
990 assert(PTE_PRESENT(&pte));
991
992 if ((area->backend) &&
993 (area->backend->frame_free)) {
994 area->backend->frame_free(area,
995 ptr + P2SZ(size),
996 PTE_GET_FRAME(&pte));
997 }
998
999 page_mapping_remove(as, ptr + P2SZ(size));
1000 }
1001 }
1002 }
1003
1004 /*
1005 * Finish TLB shootdown sequence.
1006 */
1007
1008 tlb_invalidate_pages(as->asid, area->base, area->pages);
1009
1010 /*
1011 * Invalidate potential software translation caches
1012 * (e.g. TSB on sparc64, PHT on ppc32).
1013 */
1014 as_invalidate_translation_cache(as, area->base, area->pages);
1015 tlb_shootdown_finalize(ipl);
1016
1017 page_table_unlock(as, false);
1018
1019 btree_destroy(&area->used_space);
1020
1021 area->attributes |= AS_AREA_ATTR_PARTIAL;
1022
1023 sh_info_remove_reference(area->sh_info);
1024
1025 mutex_unlock(&area->lock);
1026
1027 /*
1028 * Remove the empty area from address space.
1029 */
1030 odict_remove(&area->las_areas);
1031
1032 free(area);
1033
1034 mutex_unlock(&as->lock);
1035 return 0;
1036}
1037
1038/** Share address space area with another or the same address space.
1039 *
1040 * Address space area mapping is shared with a new address space area.
1041 * If the source address space area has not been shared so far,
1042 * a new sh_info is created. The new address space area simply gets the
1043 * sh_info of the source area. The process of duplicating the
1044 * mapping is done through the backend share function.
1045 *
1046 * @param src_as Pointer to source address space.
1047 * @param src_base Base address of the source address space area.
1048 * @param acc_size Expected size of the source area.
1049 * @param dst_as Pointer to destination address space.
1050 * @param dst_flags_mask Destination address space area flags mask.
1051 * @param dst_base Target base address. If set to -1,
1052 * a suitable mappable area is found.
1053 * @param bound Lowest address bound if dst_base is set to -1.
1054 * Otherwise ignored.
1055 *
1056 * @return Zero on success.
1057 * @return ENOENT if there is no such task or such address space.
1058 * @return EPERM if there was a problem in accepting the area.
1059 * @return ENOMEM if there was a problem in allocating destination
1060 * address space area.
1061 * @return ENOTSUP if the address space area backend does not support
1062 * sharing.
1063 *
1064 */
1065errno_t as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
1066 as_t *dst_as, unsigned int dst_flags_mask, uintptr_t *dst_base,
1067 uintptr_t bound)
1068{
1069 mutex_lock(&src_as->lock);
1070 as_area_t *src_area = find_area_and_lock(src_as, src_base);
1071 if (!src_area) {
1072 /*
1073 * Could not find the source address space area.
1074 */
1075 mutex_unlock(&src_as->lock);
1076 return ENOENT;
1077 }
1078
1079 if (!src_area->backend->is_shareable(src_area)) {
1080 /*
1081 * The backend does not permit sharing of this area.
1082 */
1083 mutex_unlock(&src_area->lock);
1084 mutex_unlock(&src_as->lock);
1085 return ENOTSUP;
1086 }
1087
1088 size_t src_size = P2SZ(src_area->pages);
1089 unsigned int src_flags = src_area->flags;
1090 mem_backend_t *src_backend = src_area->backend;
1091 mem_backend_data_t src_backend_data = src_area->backend_data;
1092
1093 /* Share the cacheable flag from the original mapping */
1094 if (src_flags & AS_AREA_CACHEABLE)
1095 dst_flags_mask |= AS_AREA_CACHEABLE;
1096
1097 if ((src_size != acc_size) ||
1098 ((src_flags & dst_flags_mask) != dst_flags_mask)) {
1099 mutex_unlock(&src_area->lock);
1100 mutex_unlock(&src_as->lock);
1101 return EPERM;
1102 }
1103
1104 /*
1105 * Now we are committed to sharing the area.
1106 * First, prepare the area for sharing.
1107 * Then it will be safe to unlock it.
1108 */
1109 share_info_t *sh_info = src_area->sh_info;
1110
1111 mutex_lock(&sh_info->lock);
1112 sh_info->refcount++;
1113 bool shared = sh_info->shared;
1114 sh_info->shared = true;
1115 mutex_unlock(&sh_info->lock);
1116
1117 if (!shared) {
1118 /*
1119 * Call the backend to setup sharing.
1120 * This only happens once for each sh_info.
1121 */
1122 src_area->backend->share(src_area);
1123 }
1124
1125 mutex_unlock(&src_area->lock);
1126 mutex_unlock(&src_as->lock);
1127
1128 /*
1129 * Create copy of the source address space area.
1130 * The destination area is created with AS_AREA_ATTR_PARTIAL
1131 * attribute set which prevents race condition with
1132 * preliminary as_page_fault() calls.
1133 * The flags of the source area are masked against dst_flags_mask
1134 * to support sharing in less privileged mode.
1135 */
1136 as_area_t *dst_area = as_area_create(dst_as, dst_flags_mask,
1137 src_size, AS_AREA_ATTR_PARTIAL, src_backend,
1138 &src_backend_data, dst_base, bound);
1139 if (!dst_area) {
1140 /*
1141 * Destination address space area could not be created.
1142 */
1143 sh_info_remove_reference(sh_info);
1144
1145 return ENOMEM;
1146 }
1147
1148 /*
1149 * Now the destination address space area has been
1150 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
1151 * attribute and set the sh_info.
1152 */
1153 mutex_lock(&dst_as->lock);
1154 mutex_lock(&dst_area->lock);
1155 dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
1156 dst_area->sh_info = sh_info;
1157 mutex_unlock(&dst_area->lock);
1158 mutex_unlock(&dst_as->lock);
1159
1160 return 0;
1161}
1162
1163/** Check access mode for address space area.
1164 *
1165 * @param area Address space area.
1166 * @param access Access mode.
1167 *
1168 * @return False if access violates area's permissions, true
1169 * otherwise.
1170 *
1171 */
1172NO_TRACE bool as_area_check_access(as_area_t *area, pf_access_t access)
1173{
1174 assert(mutex_locked(&area->lock));
1175
1176 int flagmap[] = {
1177 [PF_ACCESS_READ] = AS_AREA_READ,
1178 [PF_ACCESS_WRITE] = AS_AREA_WRITE,
1179 [PF_ACCESS_EXEC] = AS_AREA_EXEC
1180 };
1181
1182 if (!(area->flags & flagmap[access]))
1183 return false;
1184
1185 return true;
1186}
1187
1188/** Convert address space area flags to page flags.
1189 *
1190 * @param aflags Flags of some address space area.
1191 *
1192 * @return Flags to be passed to page_mapping_insert().
1193 *
1194 */
1195NO_TRACE static unsigned int area_flags_to_page_flags(unsigned int aflags)
1196{
1197 unsigned int flags = PAGE_USER | PAGE_PRESENT;
1198
1199 if (aflags & AS_AREA_READ)
1200 flags |= PAGE_READ;
1201
1202 if (aflags & AS_AREA_WRITE)
1203 flags |= PAGE_WRITE;
1204
1205 if (aflags & AS_AREA_EXEC)
1206 flags |= PAGE_EXEC;
1207
1208 if (aflags & AS_AREA_CACHEABLE)
1209 flags |= PAGE_CACHEABLE;
1210
1211 return flags;
1212}
1213
1214/** Change adress space area flags.
1215 *
1216 * The idea is to have the same data, but with a different access mode.
1217 * This is needed e.g. for writing code into memory and then executing it.
1218 * In order for this to work properly, this may copy the data
1219 * into private anonymous memory (unless it's already there).
1220 *
1221 * @param as Address space.
1222 * @param flags Flags of the area memory.
1223 * @param address Address within the area to be changed.
1224 *
1225 * @return Zero on success or a value from @ref errno.h on failure.
1226 *
1227 */
1228errno_t as_area_change_flags(as_t *as, unsigned int flags, uintptr_t address)
1229{
1230 /* Flags for the new memory mapping */
1231 unsigned int page_flags = area_flags_to_page_flags(flags);
1232
1233 mutex_lock(&as->lock);
1234
1235 as_area_t *area = find_area_and_lock(as, address);
1236 if (!area) {
1237 mutex_unlock(&as->lock);
1238 return ENOENT;
1239 }
1240
1241 if (area->backend != &anon_backend) {
1242 /* Copying non-anonymous memory not supported yet */
1243 mutex_unlock(&area->lock);
1244 mutex_unlock(&as->lock);
1245 return ENOTSUP;
1246 }
1247
1248 mutex_lock(&area->sh_info->lock);
1249 if (area->sh_info->shared) {
1250 /* Copying shared areas not supported yet */
1251 mutex_unlock(&area->sh_info->lock);
1252 mutex_unlock(&area->lock);
1253 mutex_unlock(&as->lock);
1254 return ENOTSUP;
1255 }
1256 mutex_unlock(&area->sh_info->lock);
1257
1258 /*
1259 * Compute total number of used pages in the used_space B+tree
1260 */
1261 size_t used_pages = 0;
1262
1263 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1264 node) {
1265 btree_key_t i;
1266
1267 for (i = 0; i < node->keys; i++)
1268 used_pages += (size_t) node->value[i];
1269 }
1270
1271 /* An array for storing frame numbers */
1272 uintptr_t *old_frame = malloc(used_pages * sizeof(uintptr_t));
1273 if (!old_frame) {
1274 mutex_unlock(&area->lock);
1275 mutex_unlock(&as->lock);
1276 return ENOMEM;
1277 }
1278
1279 page_table_lock(as, false);
1280
1281 /*
1282 * Start TLB shootdown sequence.
1283 */
1284 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
1285 area->pages);
1286
1287 /*
1288 * Remove used pages from page tables and remember their frame
1289 * numbers.
1290 */
1291 size_t frame_idx = 0;
1292
1293 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1294 node) {
1295 btree_key_t i;
1296
1297 for (i = 0; i < node->keys; i++) {
1298 uintptr_t ptr = node->key[i];
1299 size_t size;
1300
1301 for (size = 0; size < (size_t) node->value[i]; size++) {
1302 pte_t pte;
1303 bool found = page_mapping_find(as,
1304 ptr + P2SZ(size), false, &pte);
1305
1306 assert(found);
1307 assert(PTE_VALID(&pte));
1308 assert(PTE_PRESENT(&pte));
1309
1310 old_frame[frame_idx++] = PTE_GET_FRAME(&pte);
1311
1312 /* Remove old mapping */
1313 page_mapping_remove(as, ptr + P2SZ(size));
1314 }
1315 }
1316 }
1317
1318 /*
1319 * Finish TLB shootdown sequence.
1320 */
1321
1322 tlb_invalidate_pages(as->asid, area->base, area->pages);
1323
1324 /*
1325 * Invalidate potential software translation caches
1326 * (e.g. TSB on sparc64, PHT on ppc32).
1327 */
1328 as_invalidate_translation_cache(as, area->base, area->pages);
1329 tlb_shootdown_finalize(ipl);
1330
1331 page_table_unlock(as, false);
1332
1333 /*
1334 * Set the new flags.
1335 */
1336 area->flags = flags;
1337
1338 /*
1339 * Map pages back in with new flags. This step is kept separate
1340 * so that the memory area could not be accesed with both the old and
1341 * the new flags at once.
1342 */
1343 frame_idx = 0;
1344
1345 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1346 node) {
1347 btree_key_t i;
1348
1349 for (i = 0; i < node->keys; i++) {
1350 uintptr_t ptr = node->key[i];
1351 size_t size;
1352
1353 for (size = 0; size < (size_t) node->value[i]; size++) {
1354 page_table_lock(as, false);
1355
1356 /* Insert the new mapping */
1357 page_mapping_insert(as, ptr + P2SZ(size),
1358 old_frame[frame_idx++], page_flags);
1359
1360 page_table_unlock(as, false);
1361 }
1362 }
1363 }
1364
1365 free(old_frame);
1366
1367 mutex_unlock(&area->lock);
1368 mutex_unlock(&as->lock);
1369
1370 return 0;
1371}
1372
1373/** Handle page fault within the current address space.
1374 *
1375 * This is the high-level page fault handler. It decides whether the page fault
1376 * can be resolved by any backend and if so, it invokes the backend to resolve
1377 * the page fault.
1378 *
1379 * Interrupts are assumed disabled.
1380 *
1381 * @param address Faulting address.
1382 * @param access Access mode that caused the page fault (i.e.
1383 * read/write/exec).
1384 * @param istate Pointer to the interrupted state.
1385 *
1386 * @return AS_PF_FAULT on page fault.
1387 * @return AS_PF_OK on success.
1388 * @return AS_PF_DEFER if the fault was caused by copy_to_uspace()
1389 * or copy_from_uspace().
1390 *
1391 */
1392int as_page_fault(uintptr_t address, pf_access_t access, istate_t *istate)
1393{
1394 uintptr_t page = ALIGN_DOWN(address, PAGE_SIZE);
1395 int rc = AS_PF_FAULT;
1396
1397 if (!THREAD)
1398 goto page_fault;
1399
1400 if (!AS)
1401 goto page_fault;
1402
1403 mutex_lock(&AS->lock);
1404 as_area_t *area = find_area_and_lock(AS, page);
1405 if (!area) {
1406 /*
1407 * No area contained mapping for 'page'.
1408 * Signal page fault to low-level handler.
1409 */
1410 mutex_unlock(&AS->lock);
1411 goto page_fault;
1412 }
1413
1414 if (area->attributes & AS_AREA_ATTR_PARTIAL) {
1415 /*
1416 * The address space area is not fully initialized.
1417 * Avoid possible race by returning error.
1418 */
1419 mutex_unlock(&area->lock);
1420 mutex_unlock(&AS->lock);
1421 goto page_fault;
1422 }
1423
1424 if ((!area->backend) || (!area->backend->page_fault)) {
1425 /*
1426 * The address space area is not backed by any backend
1427 * or the backend cannot handle page faults.
1428 */
1429 mutex_unlock(&area->lock);
1430 mutex_unlock(&AS->lock);
1431 goto page_fault;
1432 }
1433
1434 page_table_lock(AS, false);
1435
1436 /*
1437 * To avoid race condition between two page faults on the same address,
1438 * we need to make sure the mapping has not been already inserted.
1439 */
1440 pte_t pte;
1441 bool found = page_mapping_find(AS, page, false, &pte);
1442 if (found && PTE_PRESENT(&pte)) {
1443 if (((access == PF_ACCESS_READ) && PTE_READABLE(&pte)) ||
1444 (access == PF_ACCESS_WRITE && PTE_WRITABLE(&pte)) ||
1445 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(&pte))) {
1446 page_table_unlock(AS, false);
1447 mutex_unlock(&area->lock);
1448 mutex_unlock(&AS->lock);
1449 return AS_PF_OK;
1450 }
1451 }
1452
1453 /*
1454 * Resort to the backend page fault handler.
1455 */
1456 rc = area->backend->page_fault(area, page, access);
1457 if (rc != AS_PF_OK) {
1458 page_table_unlock(AS, false);
1459 mutex_unlock(&area->lock);
1460 mutex_unlock(&AS->lock);
1461 goto page_fault;
1462 }
1463
1464 page_table_unlock(AS, false);
1465 mutex_unlock(&area->lock);
1466 mutex_unlock(&AS->lock);
1467 return AS_PF_OK;
1468
1469page_fault:
1470 if (THREAD->in_copy_from_uspace) {
1471 THREAD->in_copy_from_uspace = false;
1472 istate_set_retaddr(istate,
1473 (uintptr_t) &memcpy_from_uspace_failover_address);
1474 } else if (THREAD->in_copy_to_uspace) {
1475 THREAD->in_copy_to_uspace = false;
1476 istate_set_retaddr(istate,
1477 (uintptr_t) &memcpy_to_uspace_failover_address);
1478 } else if (rc == AS_PF_SILENT) {
1479 printf("Killing task %" PRIu64 " due to a "
1480 "failed late reservation request.\n", TASK->taskid);
1481 task_kill_self(true);
1482 } else {
1483 fault_if_from_uspace(istate, "Page fault: %p.", (void *) address);
1484 panic_memtrap(istate, access, address, NULL);
1485 }
1486
1487 return AS_PF_DEFER;
1488}
1489
1490/** Switch address spaces.
1491 *
1492 * Note that this function cannot sleep as it is essentially a part of
1493 * scheduling. Sleeping here would lead to deadlock on wakeup. Another
1494 * thing which is forbidden in this context is locking the address space.
1495 *
1496 * When this function is entered, no spinlocks may be held.
1497 *
1498 * @param old Old address space or NULL.
1499 * @param new New address space.
1500 *
1501 */
1502void as_switch(as_t *old_as, as_t *new_as)
1503{
1504 DEADLOCK_PROBE_INIT(p_asidlock);
1505 preemption_disable();
1506
1507retry:
1508 (void) interrupts_disable();
1509 if (!spinlock_trylock(&asidlock)) {
1510 /*
1511 * Avoid deadlock with TLB shootdown.
1512 * We can enable interrupts here because
1513 * preemption is disabled. We should not be
1514 * holding any other lock.
1515 */
1516 (void) interrupts_enable();
1517 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
1518 goto retry;
1519 }
1520 preemption_enable();
1521
1522 /*
1523 * First, take care of the old address space.
1524 */
1525 if (old_as) {
1526 assert(old_as->cpu_refcount);
1527
1528 if ((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
1529 /*
1530 * The old address space is no longer active on
1531 * any processor. It can be appended to the
1532 * list of inactive address spaces with assigned
1533 * ASID.
1534 */
1535 assert(old_as->asid != ASID_INVALID);
1536
1537 list_append(&old_as->inactive_as_with_asid_link,
1538 &inactive_as_with_asid_list);
1539 }
1540
1541 /*
1542 * Perform architecture-specific tasks when the address space
1543 * is being removed from the CPU.
1544 */
1545 as_deinstall_arch(old_as);
1546 }
1547
1548 /*
1549 * Second, prepare the new address space.
1550 */
1551 if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
1552 if (new_as->asid != ASID_INVALID)
1553 list_remove(&new_as->inactive_as_with_asid_link);
1554 else
1555 new_as->asid = asid_get();
1556 }
1557
1558#ifdef AS_PAGE_TABLE
1559 SET_PTL0_ADDRESS(new_as->genarch.page_table);
1560#endif
1561
1562 /*
1563 * Perform architecture-specific steps.
1564 * (e.g. write ASID to hardware register etc.)
1565 */
1566 as_install_arch(new_as);
1567
1568 spinlock_unlock(&asidlock);
1569
1570 AS = new_as;
1571}
1572
1573/** Compute flags for virtual address translation subsytem.
1574 *
1575 * @param area Address space area.
1576 *
1577 * @return Flags to be used in page_mapping_insert().
1578 *
1579 */
1580NO_TRACE unsigned int as_area_get_flags(as_area_t *area)
1581{
1582 assert(mutex_locked(&area->lock));
1583
1584 return area_flags_to_page_flags(area->flags);
1585}
1586
1587/** Get key function for the @c as_t.as_areas ordered dictionary.
1588 *
1589 * @param odlink Link
1590 * @return Pointer to task ID cast as 'void *'
1591 */
1592static void *as_areas_getkey(odlink_t *odlink)
1593{
1594 as_area_t *area = odict_get_instance(odlink, as_area_t, las_areas);
1595 return (void *) &area->base;
1596}
1597
1598/** Key comparison function for the @c as_t.as_areas ordered dictionary.
1599 *
1600 * @param a Pointer to area A base
1601 * @param b Pointer to area B base
1602 * @return -1, 0, 1 iff base of A is lower than, equal to, higher than B
1603 */
1604static int as_areas_cmp(void *a, void *b)
1605{
1606 uintptr_t base_a = *(uintptr_t *)a;
1607 uintptr_t base_b = *(uintptr_t *)b;
1608
1609 if (base_a < base_b)
1610 return -1;
1611 else if (base_a == base_b)
1612 return 0;
1613 else
1614 return +1;
1615}
1616
1617/** Create page table.
1618 *
1619 * Depending on architecture, create either address space private or global page
1620 * table.
1621 *
1622 * @param flags Flags saying whether the page table is for the kernel
1623 * address space.
1624 *
1625 * @return First entry of the page table.
1626 *
1627 */
1628NO_TRACE pte_t *page_table_create(unsigned int flags)
1629{
1630 assert(as_operations);
1631 assert(as_operations->page_table_create);
1632
1633 return as_operations->page_table_create(flags);
1634}
1635
1636/** Destroy page table.
1637 *
1638 * Destroy page table in architecture specific way.
1639 *
1640 * @param page_table Physical address of PTL0.
1641 *
1642 */
1643NO_TRACE void page_table_destroy(pte_t *page_table)
1644{
1645 assert(as_operations);
1646 assert(as_operations->page_table_destroy);
1647
1648 as_operations->page_table_destroy(page_table);
1649}
1650
1651/** Lock page table.
1652 *
1653 * This function should be called before any page_mapping_insert(),
1654 * page_mapping_remove() and page_mapping_find().
1655 *
1656 * Locking order is such that address space areas must be locked
1657 * prior to this call. Address space can be locked prior to this
1658 * call in which case the lock argument is false.
1659 *
1660 * @param as Address space.
1661 * @param lock If false, do not attempt to lock as->lock.
1662 *
1663 */
1664NO_TRACE void page_table_lock(as_t *as, bool lock)
1665{
1666 assert(as_operations);
1667 assert(as_operations->page_table_lock);
1668
1669 as_operations->page_table_lock(as, lock);
1670}
1671
1672/** Unlock page table.
1673 *
1674 * @param as Address space.
1675 * @param unlock If false, do not attempt to unlock as->lock.
1676 *
1677 */
1678NO_TRACE void page_table_unlock(as_t *as, bool unlock)
1679{
1680 assert(as_operations);
1681 assert(as_operations->page_table_unlock);
1682
1683 as_operations->page_table_unlock(as, unlock);
1684}
1685
1686/** Test whether page tables are locked.
1687 *
1688 * @param as Address space where the page tables belong.
1689 *
1690 * @return True if the page tables belonging to the address soace
1691 * are locked, otherwise false.
1692 */
1693NO_TRACE bool page_table_locked(as_t *as)
1694{
1695 assert(as_operations);
1696 assert(as_operations->page_table_locked);
1697
1698 return as_operations->page_table_locked(as);
1699}
1700
1701/** Return size of the address space area with given base.
1702 *
1703 * @param base Arbitrary address inside the address space area.
1704 *
1705 * @return Size of the address space area in bytes or zero if it
1706 * does not exist.
1707 *
1708 */
1709size_t as_area_get_size(uintptr_t base)
1710{
1711 size_t size;
1712
1713 page_table_lock(AS, true);
1714 as_area_t *src_area = find_area_and_lock(AS, base);
1715
1716 if (src_area) {
1717 size = P2SZ(src_area->pages);
1718 mutex_unlock(&src_area->lock);
1719 } else
1720 size = 0;
1721
1722 page_table_unlock(AS, true);
1723 return size;
1724}
1725
1726/** Mark portion of address space area as used.
1727 *
1728 * The address space area must be already locked.
1729 *
1730 * @param area Address space area.
1731 * @param page First page to be marked.
1732 * @param count Number of page to be marked.
1733 *
1734 * @return False on failure or true on success.
1735 *
1736 */
1737bool used_space_insert(as_area_t *area, uintptr_t page, size_t count)
1738{
1739 assert(mutex_locked(&area->lock));
1740 assert(IS_ALIGNED(page, PAGE_SIZE));
1741 assert(count);
1742
1743 btree_node_t *leaf = NULL;
1744 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1745 if (pages) {
1746 /*
1747 * We hit the beginning of some used space.
1748 */
1749 return false;
1750 }
1751
1752 assert(leaf != NULL);
1753
1754 if (!leaf->keys) {
1755 btree_insert(&area->used_space, page, (void *) count, leaf);
1756 goto success;
1757 }
1758
1759 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
1760 if (node) {
1761 uintptr_t left_pg = node->key[node->keys - 1];
1762 uintptr_t right_pg = leaf->key[0];
1763 size_t left_cnt = (size_t) node->value[node->keys - 1];
1764 size_t right_cnt = (size_t) leaf->value[0];
1765
1766 /*
1767 * Examine the possibility that the interval fits
1768 * somewhere between the rightmost interval of
1769 * the left neigbour and the first interval of the leaf.
1770 */
1771
1772 if (page >= right_pg) {
1773 /* Do nothing. */
1774 } else if (overlaps(page, P2SZ(count), left_pg,
1775 P2SZ(left_cnt))) {
1776 /* The interval intersects with the left interval. */
1777 return false;
1778 } else if (overlaps(page, P2SZ(count), right_pg,
1779 P2SZ(right_cnt))) {
1780 /* The interval intersects with the right interval. */
1781 return false;
1782 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1783 (page + P2SZ(count) == right_pg)) {
1784 /*
1785 * The interval can be added by merging the two already
1786 * present intervals.
1787 */
1788 node->value[node->keys - 1] += count + right_cnt;
1789 btree_remove(&area->used_space, right_pg, leaf);
1790 goto success;
1791 } else if (page == left_pg + P2SZ(left_cnt)) {
1792 /*
1793 * The interval can be added by simply growing the left
1794 * interval.
1795 */
1796 node->value[node->keys - 1] += count;
1797 goto success;
1798 } else if (page + P2SZ(count) == right_pg) {
1799 /*
1800 * The interval can be addded by simply moving base of
1801 * the right interval down and increasing its size
1802 * accordingly.
1803 */
1804 leaf->value[0] += count;
1805 leaf->key[0] = page;
1806 goto success;
1807 } else {
1808 /*
1809 * The interval is between both neigbouring intervals,
1810 * but cannot be merged with any of them.
1811 */
1812 btree_insert(&area->used_space, page, (void *) count,
1813 leaf);
1814 goto success;
1815 }
1816 } else if (page < leaf->key[0]) {
1817 uintptr_t right_pg = leaf->key[0];
1818 size_t right_cnt = (size_t) leaf->value[0];
1819
1820 /*
1821 * Investigate the border case in which the left neighbour does
1822 * not exist but the interval fits from the left.
1823 */
1824
1825 if (overlaps(page, P2SZ(count), right_pg, P2SZ(right_cnt))) {
1826 /* The interval intersects with the right interval. */
1827 return false;
1828 } else if (page + P2SZ(count) == right_pg) {
1829 /*
1830 * The interval can be added by moving the base of the
1831 * right interval down and increasing its size
1832 * accordingly.
1833 */
1834 leaf->key[0] = page;
1835 leaf->value[0] += count;
1836 goto success;
1837 } else {
1838 /*
1839 * The interval doesn't adjoin with the right interval.
1840 * It must be added individually.
1841 */
1842 btree_insert(&area->used_space, page, (void *) count,
1843 leaf);
1844 goto success;
1845 }
1846 }
1847
1848 node = btree_leaf_node_right_neighbour(&area->used_space, leaf);
1849 if (node) {
1850 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1851 uintptr_t right_pg = node->key[0];
1852 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1853 size_t right_cnt = (size_t) node->value[0];
1854
1855 /*
1856 * Examine the possibility that the interval fits
1857 * somewhere between the leftmost interval of
1858 * the right neigbour and the last interval of the leaf.
1859 */
1860
1861 if (page < left_pg) {
1862 /* Do nothing. */
1863 } else if (overlaps(page, P2SZ(count), left_pg,
1864 P2SZ(left_cnt))) {
1865 /* The interval intersects with the left interval. */
1866 return false;
1867 } else if (overlaps(page, P2SZ(count), right_pg,
1868 P2SZ(right_cnt))) {
1869 /* The interval intersects with the right interval. */
1870 return false;
1871 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1872 (page + P2SZ(count) == right_pg)) {
1873 /*
1874 * The interval can be added by merging the two already
1875 * present intervals.
1876 */
1877 leaf->value[leaf->keys - 1] += count + right_cnt;
1878 btree_remove(&area->used_space, right_pg, node);
1879 goto success;
1880 } else if (page == left_pg + P2SZ(left_cnt)) {
1881 /*
1882 * The interval can be added by simply growing the left
1883 * interval.
1884 */
1885 leaf->value[leaf->keys - 1] += count;
1886 goto success;
1887 } else if (page + P2SZ(count) == right_pg) {
1888 /*
1889 * The interval can be addded by simply moving base of
1890 * the right interval down and increasing its size
1891 * accordingly.
1892 */
1893 node->value[0] += count;
1894 node->key[0] = page;
1895 goto success;
1896 } else {
1897 /*
1898 * The interval is between both neigbouring intervals,
1899 * but cannot be merged with any of them.
1900 */
1901 btree_insert(&area->used_space, page, (void *) count,
1902 leaf);
1903 goto success;
1904 }
1905 } else if (page >= leaf->key[leaf->keys - 1]) {
1906 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1907 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1908
1909 /*
1910 * Investigate the border case in which the right neighbour
1911 * does not exist but the interval fits from the right.
1912 */
1913
1914 if (overlaps(page, P2SZ(count), left_pg, P2SZ(left_cnt))) {
1915 /* The interval intersects with the left interval. */
1916 return false;
1917 } else if (left_pg + P2SZ(left_cnt) == page) {
1918 /*
1919 * The interval can be added by growing the left
1920 * interval.
1921 */
1922 leaf->value[leaf->keys - 1] += count;
1923 goto success;
1924 } else {
1925 /*
1926 * The interval doesn't adjoin with the left interval.
1927 * It must be added individually.
1928 */
1929 btree_insert(&area->used_space, page, (void *) count,
1930 leaf);
1931 goto success;
1932 }
1933 }
1934
1935 /*
1936 * Note that if the algorithm made it thus far, the interval can fit
1937 * only between two other intervals of the leaf. The two border cases
1938 * were already resolved.
1939 */
1940 btree_key_t i;
1941 for (i = 1; i < leaf->keys; i++) {
1942 if (page < leaf->key[i]) {
1943 uintptr_t left_pg = leaf->key[i - 1];
1944 uintptr_t right_pg = leaf->key[i];
1945 size_t left_cnt = (size_t) leaf->value[i - 1];
1946 size_t right_cnt = (size_t) leaf->value[i];
1947
1948 /*
1949 * The interval fits between left_pg and right_pg.
1950 */
1951
1952 if (overlaps(page, P2SZ(count), left_pg,
1953 P2SZ(left_cnt))) {
1954 /*
1955 * The interval intersects with the left
1956 * interval.
1957 */
1958 return false;
1959 } else if (overlaps(page, P2SZ(count), right_pg,
1960 P2SZ(right_cnt))) {
1961 /*
1962 * The interval intersects with the right
1963 * interval.
1964 */
1965 return false;
1966 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1967 (page + P2SZ(count) == right_pg)) {
1968 /*
1969 * The interval can be added by merging the two
1970 * already present intervals.
1971 */
1972 leaf->value[i - 1] += count + right_cnt;
1973 btree_remove(&area->used_space, right_pg, leaf);
1974 goto success;
1975 } else if (page == left_pg + P2SZ(left_cnt)) {
1976 /*
1977 * The interval can be added by simply growing
1978 * the left interval.
1979 */
1980 leaf->value[i - 1] += count;
1981 goto success;
1982 } else if (page + P2SZ(count) == right_pg) {
1983 /*
1984 * The interval can be addded by simply moving
1985 * base of the right interval down and
1986 * increasing its size accordingly.
1987 */
1988 leaf->value[i] += count;
1989 leaf->key[i] = page;
1990 goto success;
1991 } else {
1992 /*
1993 * The interval is between both neigbouring
1994 * intervals, but cannot be merged with any of
1995 * them.
1996 */
1997 btree_insert(&area->used_space, page,
1998 (void *) count, leaf);
1999 goto success;
2000 }
2001 }
2002 }
2003
2004 panic("Inconsistency detected while adding %zu pages of used "
2005 "space at %p.", count, (void *) page);
2006
2007success:
2008 area->resident += count;
2009 return true;
2010}
2011
2012/** Mark portion of address space area as unused.
2013 *
2014 * The address space area must be already locked.
2015 *
2016 * @param area Address space area.
2017 * @param page First page to be marked.
2018 * @param count Number of page to be marked.
2019 *
2020 * @return False on failure or true on success.
2021 *
2022 */
2023bool used_space_remove(as_area_t *area, uintptr_t page, size_t count)
2024{
2025 assert(mutex_locked(&area->lock));
2026 assert(IS_ALIGNED(page, PAGE_SIZE));
2027 assert(count);
2028
2029 btree_node_t *leaf;
2030 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
2031 if (pages) {
2032 /*
2033 * We are lucky, page is the beginning of some interval.
2034 */
2035 if (count > pages) {
2036 return false;
2037 } else if (count == pages) {
2038 btree_remove(&area->used_space, page, leaf);
2039 goto success;
2040 } else {
2041 /*
2042 * Find the respective interval.
2043 * Decrease its size and relocate its start address.
2044 */
2045 btree_key_t i;
2046 for (i = 0; i < leaf->keys; i++) {
2047 if (leaf->key[i] == page) {
2048 leaf->key[i] += P2SZ(count);
2049 leaf->value[i] -= count;
2050 goto success;
2051 }
2052 }
2053
2054 goto error;
2055 }
2056 }
2057
2058 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space,
2059 leaf);
2060 if ((node) && (page < leaf->key[0])) {
2061 uintptr_t left_pg = node->key[node->keys - 1];
2062 size_t left_cnt = (size_t) node->value[node->keys - 1];
2063
2064 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2065 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2066 /*
2067 * The interval is contained in the rightmost
2068 * interval of the left neighbour and can be
2069 * removed by updating the size of the bigger
2070 * interval.
2071 */
2072 node->value[node->keys - 1] -= count;
2073 goto success;
2074 } else if (page + P2SZ(count) <
2075 left_pg + P2SZ(left_cnt)) {
2076 size_t new_cnt;
2077
2078 /*
2079 * The interval is contained in the rightmost
2080 * interval of the left neighbour but its
2081 * removal requires both updating the size of
2082 * the original interval and also inserting a
2083 * new interval.
2084 */
2085 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2086 (page + P2SZ(count))) >> PAGE_WIDTH;
2087 node->value[node->keys - 1] -= count + new_cnt;
2088 btree_insert(&area->used_space, page +
2089 P2SZ(count), (void *) new_cnt, leaf);
2090 goto success;
2091 }
2092 }
2093
2094 return false;
2095 } else if (page < leaf->key[0])
2096 return false;
2097
2098 if (page > leaf->key[leaf->keys - 1]) {
2099 uintptr_t left_pg = leaf->key[leaf->keys - 1];
2100 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
2101
2102 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2103 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2104 /*
2105 * The interval is contained in the rightmost
2106 * interval of the leaf and can be removed by
2107 * updating the size of the bigger interval.
2108 */
2109 leaf->value[leaf->keys - 1] -= count;
2110 goto success;
2111 } else if (page + P2SZ(count) < left_pg +
2112 P2SZ(left_cnt)) {
2113 size_t new_cnt;
2114
2115 /*
2116 * The interval is contained in the rightmost
2117 * interval of the leaf but its removal
2118 * requires both updating the size of the
2119 * original interval and also inserting a new
2120 * interval.
2121 */
2122 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2123 (page + P2SZ(count))) >> PAGE_WIDTH;
2124 leaf->value[leaf->keys - 1] -= count + new_cnt;
2125 btree_insert(&area->used_space, page +
2126 P2SZ(count), (void *) new_cnt, leaf);
2127 goto success;
2128 }
2129 }
2130
2131 return false;
2132 }
2133
2134 /*
2135 * The border cases have been already resolved.
2136 * Now the interval can be only between intervals of the leaf.
2137 */
2138 btree_key_t i;
2139 for (i = 1; i < leaf->keys - 1; i++) {
2140 if (page < leaf->key[i]) {
2141 uintptr_t left_pg = leaf->key[i - 1];
2142 size_t left_cnt = (size_t) leaf->value[i - 1];
2143
2144 /*
2145 * Now the interval is between intervals corresponding
2146 * to (i - 1) and i.
2147 */
2148 if (overlaps(left_pg, P2SZ(left_cnt), page,
2149 P2SZ(count))) {
2150 if (page + P2SZ(count) ==
2151 left_pg + P2SZ(left_cnt)) {
2152 /*
2153 * The interval is contained in the
2154 * interval (i - 1) of the leaf and can
2155 * be removed by updating the size of
2156 * the bigger interval.
2157 */
2158 leaf->value[i - 1] -= count;
2159 goto success;
2160 } else if (page + P2SZ(count) <
2161 left_pg + P2SZ(left_cnt)) {
2162 size_t new_cnt;
2163
2164 /*
2165 * The interval is contained in the
2166 * interval (i - 1) of the leaf but its
2167 * removal requires both updating the
2168 * size of the original interval and
2169 * also inserting a new interval.
2170 */
2171 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2172 (page + P2SZ(count))) >>
2173 PAGE_WIDTH;
2174 leaf->value[i - 1] -= count + new_cnt;
2175 btree_insert(&area->used_space, page +
2176 P2SZ(count), (void *) new_cnt,
2177 leaf);
2178 goto success;
2179 }
2180 }
2181
2182 return false;
2183 }
2184 }
2185
2186error:
2187 panic("Inconsistency detected while removing %zu pages of used "
2188 "space from %p.", count, (void *) page);
2189
2190success:
2191 area->resident -= count;
2192 return true;
2193}
2194
2195/*
2196 * Address space related syscalls.
2197 */
2198
2199sysarg_t sys_as_area_create(uintptr_t base, size_t size, unsigned int flags,
2200 uintptr_t bound, as_area_pager_info_t *pager_info)
2201{
2202 uintptr_t virt = base;
2203 mem_backend_t *backend;
2204 mem_backend_data_t backend_data;
2205
2206 if (pager_info == AS_AREA_UNPAGED)
2207 backend = &anon_backend;
2208 else {
2209 backend = &user_backend;
2210 if (copy_from_uspace(&backend_data.pager_info, pager_info,
2211 sizeof(as_area_pager_info_t)) != EOK) {
2212 return (sysarg_t) AS_MAP_FAILED;
2213 }
2214 }
2215 as_area_t *area = as_area_create(AS, flags, size,
2216 AS_AREA_ATTR_NONE, backend, &backend_data, &virt, bound);
2217 if (area == NULL)
2218 return (sysarg_t) AS_MAP_FAILED;
2219
2220 return (sysarg_t) virt;
2221}
2222
2223sys_errno_t sys_as_area_resize(uintptr_t address, size_t size, unsigned int flags)
2224{
2225 return (sys_errno_t) as_area_resize(AS, address, size, 0);
2226}
2227
2228sys_errno_t sys_as_area_change_flags(uintptr_t address, unsigned int flags)
2229{
2230 return (sys_errno_t) as_area_change_flags(AS, flags, address);
2231}
2232
2233sys_errno_t sys_as_area_destroy(uintptr_t address)
2234{
2235 return (sys_errno_t) as_area_destroy(AS, address);
2236}
2237
2238/** Get list of adress space areas.
2239 *
2240 * @param as Address space.
2241 * @param obuf Place to save pointer to returned buffer.
2242 * @param osize Place to save size of returned buffer.
2243 *
2244 */
2245void as_get_area_info(as_t *as, as_area_info_t **obuf, size_t *osize)
2246{
2247 mutex_lock(&as->lock);
2248
2249 /* Count number of areas. */
2250 size_t area_cnt = odict_count(&as->as_areas);
2251
2252 size_t isize = area_cnt * sizeof(as_area_info_t);
2253 as_area_info_t *info = nfmalloc(isize);
2254
2255 /* Record area data. */
2256
2257 size_t area_idx = 0;
2258
2259 as_area_t *area = as_area_first(as);
2260 while (area != NULL) {
2261 assert(area_idx < area_cnt);
2262 mutex_lock(&area->lock);
2263
2264 info[area_idx].start_addr = area->base;
2265 info[area_idx].size = P2SZ(area->pages);
2266 info[area_idx].flags = area->flags;
2267 ++area_idx;
2268
2269 mutex_unlock(&area->lock);
2270 area = as_area_next(area);
2271 }
2272
2273 mutex_unlock(&as->lock);
2274
2275 *obuf = info;
2276 *osize = isize;
2277}
2278
2279/** Print out information about address space.
2280 *
2281 * @param as Address space.
2282 *
2283 */
2284void as_print(as_t *as)
2285{
2286 mutex_lock(&as->lock);
2287
2288 /* Print out info about address space areas */
2289 as_area_t *area = as_area_first(as);
2290 while (area != NULL) {
2291 mutex_lock(&area->lock);
2292 printf("as_area: %p, base=%p, pages=%zu"
2293 " (%p - %p)\n", area, (void *) area->base,
2294 area->pages, (void *) area->base,
2295 (void *) (area->base + P2SZ(area->pages)));
2296 mutex_unlock(&area->lock);
2297
2298 area = as_area_next(area);
2299 }
2300
2301 mutex_unlock(&as->lock);
2302}
2303
2304/** @}
2305 */
Note: See TracBrowser for help on using the repository browser.