source: mainline/kernel/generic/src/mm/as.c@ aafed15

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since aafed15 was aafed15, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

Declare malloc() etc in standard <stdlib.h> rather than <mm/slab.h>

  • Property mode set to 100644
File size: 57.2 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * Copyright (c) 2018 Jiri Svoboda
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup kernel_generic_mm
31 * @{
32 */
33
34/**
35 * @file
36 * @brief Address space related functions.
37 *
38 * This file contains address space manipulation functions.
39 * Roughly speaking, this is a higher-level client of
40 * Virtual Address Translation (VAT) subsystem.
41 *
42 * Functionality provided by this file allows one to
43 * create address spaces and create, resize and share
44 * address space areas.
45 *
46 * @see page.c
47 *
48 */
49
50#include <mm/as.h>
51#include <arch/mm/as.h>
52#include <mm/page.h>
53#include <mm/frame.h>
54#include <mm/slab.h>
55#include <mm/tlb.h>
56#include <arch/mm/page.h>
57#include <genarch/mm/page_pt.h>
58#include <genarch/mm/page_ht.h>
59#include <mm/asid.h>
60#include <arch/mm/asid.h>
61#include <preemption.h>
62#include <synch/spinlock.h>
63#include <synch/mutex.h>
64#include <adt/list.h>
65#include <adt/btree.h>
66#include <proc/task.h>
67#include <proc/thread.h>
68#include <arch/asm.h>
69#include <panic.h>
70#include <assert.h>
71#include <stdio.h>
72#include <mem.h>
73#include <macros.h>
74#include <bitops.h>
75#include <arch.h>
76#include <errno.h>
77#include <config.h>
78#include <align.h>
79#include <typedefs.h>
80#include <syscall/copy.h>
81#include <arch/interrupt.h>
82#include <interrupt.h>
83#include <stdlib.h>
84
85/**
86 * Each architecture decides what functions will be used to carry out
87 * address space operations such as creating or locking page tables.
88 */
89as_operations_t *as_operations = NULL;
90
91/** Slab for as_t objects.
92 *
93 */
94static slab_cache_t *as_cache;
95
96/** ASID subsystem lock.
97 *
98 * This lock protects:
99 * - inactive_as_with_asid_list
100 * - as->asid for each as of the as_t type
101 * - asids_allocated counter
102 *
103 */
104SPINLOCK_INITIALIZE(asidlock);
105
106/**
107 * Inactive address spaces (on all processors)
108 * that have valid ASID.
109 */
110LIST_INITIALIZE(inactive_as_with_asid_list);
111
112/** Kernel address space. */
113as_t *AS_KERNEL = NULL;
114
115static void *as_areas_getkey(odlink_t *);
116static int as_areas_cmp(void *, void *);
117
118NO_TRACE static errno_t as_constructor(void *obj, unsigned int flags)
119{
120 as_t *as = (as_t *) obj;
121
122 link_initialize(&as->inactive_as_with_asid_link);
123 mutex_initialize(&as->lock, MUTEX_PASSIVE);
124
125 return as_constructor_arch(as, flags);
126}
127
128NO_TRACE static size_t as_destructor(void *obj)
129{
130 return as_destructor_arch((as_t *) obj);
131}
132
133/** Initialize address space subsystem. */
134void as_init(void)
135{
136 as_arch_init();
137
138 as_cache = slab_cache_create("as_t", sizeof(as_t), 0,
139 as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
140
141 AS_KERNEL = as_create(FLAG_AS_KERNEL);
142 if (!AS_KERNEL)
143 panic("Cannot create kernel address space.");
144}
145
146/** Create address space.
147 *
148 * @param flags Flags that influence the way in wich the address
149 * space is created.
150 *
151 */
152as_t *as_create(unsigned int flags)
153{
154 as_t *as = (as_t *) slab_alloc(as_cache, FRAME_ATOMIC);
155 if (!as)
156 return NULL;
157
158 (void) as_create_arch(as, 0);
159
160 odict_initialize(&as->as_areas, as_areas_getkey, as_areas_cmp);
161
162 if (flags & FLAG_AS_KERNEL)
163 as->asid = ASID_KERNEL;
164 else
165 as->asid = ASID_INVALID;
166
167 refcount_init(&as->refcount);
168 as->cpu_refcount = 0;
169
170#ifdef AS_PAGE_TABLE
171 as->genarch.page_table = page_table_create(flags);
172#else
173 page_table_create(flags);
174#endif
175
176 return as;
177}
178
179/** Destroy adress space.
180 *
181 * When there are no tasks referencing this address space (i.e. its refcount is
182 * zero), the address space can be destroyed.
183 *
184 * We know that we don't hold any spinlock.
185 *
186 * @param as Address space to be destroyed.
187 *
188 */
189void as_destroy(as_t *as)
190{
191 DEADLOCK_PROBE_INIT(p_asidlock);
192
193 assert(as != AS);
194 assert(refcount_unique(&as->refcount));
195
196 /*
197 * Since there is no reference to this address space, it is safe not to
198 * lock its mutex.
199 */
200
201 /*
202 * We need to avoid deadlock between TLB shootdown and asidlock.
203 * We therefore try to take asid conditionally and if we don't succeed,
204 * we enable interrupts and try again. This is done while preemption is
205 * disabled to prevent nested context switches. We also depend on the
206 * fact that so far no spinlocks are held.
207 */
208 preemption_disable();
209 ipl_t ipl = interrupts_read();
210
211retry:
212 interrupts_disable();
213 if (!spinlock_trylock(&asidlock)) {
214 interrupts_enable();
215 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
216 goto retry;
217 }
218
219 /* Interrupts disabled, enable preemption */
220 preemption_enable();
221
222 if ((as->asid != ASID_INVALID) && (as != AS_KERNEL)) {
223 if (as->cpu_refcount == 0)
224 list_remove(&as->inactive_as_with_asid_link);
225
226 asid_put(as->asid);
227 }
228
229 spinlock_unlock(&asidlock);
230 interrupts_restore(ipl);
231
232 /*
233 * Destroy address space areas of the address space.
234 * Need to start from the beginning each time since we are destroying
235 * the areas.
236 */
237 as_area_t *area = as_area_first(as);
238 while (area != NULL) {
239 /*
240 * XXX We already have as_area_t, but as_area_destroy will
241 * have to search for it. This could be made faster.
242 */
243 as_area_destroy(as, area->base);
244 area = as_area_first(as);
245 }
246
247 odict_finalize(&as->as_areas);
248
249#ifdef AS_PAGE_TABLE
250 page_table_destroy(as->genarch.page_table);
251#else
252 page_table_destroy(NULL);
253#endif
254
255 slab_free(as_cache, as);
256}
257
258/** Hold a reference to an address space.
259 *
260 * Holding a reference to an address space prevents destruction
261 * of that address space.
262 *
263 * @param as Address space to be held.
264 *
265 */
266NO_TRACE void as_hold(as_t *as)
267{
268 refcount_up(&as->refcount);
269}
270
271/** Release a reference to an address space.
272 *
273 * The last one to release a reference to an address space
274 * destroys the address space.
275 *
276 * @param as Address space to be released.
277 *
278 */
279NO_TRACE void as_release(as_t *as)
280{
281 if (refcount_down(&as->refcount))
282 as_destroy(as);
283}
284
285/** Return first address space area.
286 *
287 * @param as Address space
288 * @return First area in @a as (i.e. area with the lowest base address)
289 * or @c NULL if there is none
290 */
291as_area_t *as_area_first(as_t *as)
292{
293 odlink_t *odlink = odict_first(&as->as_areas);
294 if (odlink == NULL)
295 return NULL;
296
297 return odict_get_instance(odlink, as_area_t, las_areas);
298}
299
300/** Return next address space area.
301 *
302 * @param cur Current area
303 * @return Next area in the same address space or @c NULL if @a cur is the
304 * last area.
305 */
306as_area_t *as_area_next(as_area_t *cur)
307{
308 odlink_t *odlink = odict_next(&cur->las_areas, &cur->as->as_areas);
309 if (odlink == NULL)
310 return NULL;
311
312 return odict_get_instance(odlink, as_area_t, las_areas);
313}
314
315/** Determine if area with specified parameters would conflict with
316 * a specific existing address space area.
317 *
318 * @param addr Starting virtual address of the area being tested.
319 * @param count Number of pages in the area being tested.
320 * @param guarded True if the area being tested is protected by guard pages.
321 * @param area Area against which we are testing.
322 *
323 * @return True if the two areas conflict, false otherwise.
324 */
325NO_TRACE static bool area_is_conflicting(uintptr_t addr,
326 size_t count, bool guarded, as_area_t *area)
327{
328 assert((addr % PAGE_SIZE) == 0);
329
330 size_t gsize = P2SZ(count);
331 size_t agsize = P2SZ(area->pages);
332
333 /*
334 * A guarded area has one guard page before, one page after.
335 * What we do here is: if either area is guarded, we add
336 * PAGE_SIZE to the size of both areas. That guarantees
337 * they will be spaced at least one page apart.
338 */
339 if (guarded || (area->flags & AS_AREA_GUARD) != 0) {
340 /* Add guard page size unless area is at the end of VA domain */
341 if (!overflows(addr, P2SZ(count)))
342 gsize += PAGE_SIZE;
343
344 /* Add guard page size unless area is at the end of VA domain */
345 if (!overflows(area->base, P2SZ(area->pages)))
346 agsize += PAGE_SIZE;
347 }
348
349 return overlaps(addr, gsize, area->base, agsize);
350
351}
352
353/** Check area conflicts with other areas.
354 *
355 * @param as Address space.
356 * @param addr Starting virtual address of the area being tested.
357 * @param count Number of pages in the area being tested.
358 * @param guarded True if the area being tested is protected by guard pages.
359 * @param avoid Do not touch this area. I.e. this area is not considered
360 * as presenting a conflict.
361 *
362 * @return True if there is no conflict, false otherwise.
363 *
364 */
365NO_TRACE static bool check_area_conflicts(as_t *as, uintptr_t addr,
366 size_t count, bool guarded, as_area_t *avoid)
367{
368 assert((addr % PAGE_SIZE) == 0);
369 assert(mutex_locked(&as->lock));
370
371 /*
372 * If the addition of the supposed area address and size overflows,
373 * report conflict.
374 */
375 if (overflows_into_positive(addr, P2SZ(count)))
376 return false;
377
378 /*
379 * We don't want any area to have conflicts with NULL page.
380 */
381 if (overlaps(addr, P2SZ(count), (uintptr_t) NULL, PAGE_SIZE))
382 return false;
383
384 /*
385 * To determine if we overlap with another area, we just need
386 * to look at overlap with the last area with base address <=
387 * to ours and on the first area with base address > than ours.
388 *
389 * First find last area with <= base address.
390 */
391 odlink_t *odlink = odict_find_leq(&as->as_areas, &addr, NULL);
392 if (odlink != NULL) {
393 as_area_t *area = odict_get_instance(odlink, as_area_t,
394 las_areas);
395
396 if (area != avoid) {
397 mutex_lock(&area->lock);
398 if (area_is_conflicting(addr, count, guarded, area)) {
399 mutex_unlock(&area->lock);
400 return false;
401 }
402
403 mutex_unlock(&area->lock);
404 }
405
406 /* Next area */
407 odlink = odict_next(odlink, &as->as_areas);
408 }
409
410 /*
411 * Next area, if any, is the first with base > than our base address.
412 * If there was no area with <= base, we need to look at the first area.
413 */
414 if (odlink == NULL)
415 odlink = odict_first(&as->as_areas);
416
417 if (odlink != NULL) {
418 as_area_t *area = odict_get_instance(odlink, as_area_t,
419 las_areas);
420
421 if (area != avoid) {
422 mutex_lock(&area->lock);
423 if (area_is_conflicting(addr, count, guarded, area)) {
424 mutex_unlock(&area->lock);
425 return false;
426 }
427
428 mutex_unlock(&area->lock);
429 }
430 }
431
432 /*
433 * So far, the area does not conflict with other areas.
434 * Check if it is contained in the user address space.
435 */
436 if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
437 return iswithin(USER_ADDRESS_SPACE_START,
438 (USER_ADDRESS_SPACE_END - USER_ADDRESS_SPACE_START) + 1,
439 addr, P2SZ(count));
440 }
441
442 return true;
443}
444
445/** Return pointer to unmapped address space area
446 *
447 * The address space must be already locked when calling
448 * this function.
449 *
450 * @param as Address space.
451 * @param bound Lowest address bound.
452 * @param size Requested size of the allocation.
453 * @param guarded True if the allocation must be protected by guard pages.
454 *
455 * @return Address of the beginning of unmapped address space area.
456 * @return -1 if no suitable address space area was found.
457 *
458 */
459NO_TRACE static uintptr_t as_get_unmapped_area(as_t *as, uintptr_t bound,
460 size_t size, bool guarded)
461{
462 assert(mutex_locked(&as->lock));
463
464 if (size == 0)
465 return (uintptr_t) -1;
466
467 /*
468 * Make sure we allocate from page-aligned
469 * address. Check for possible overflow in
470 * each step.
471 */
472
473 size_t pages = SIZE2FRAMES(size);
474
475 /*
476 * Find the lowest unmapped address aligned on the size
477 * boundary, not smaller than bound and of the required size.
478 */
479
480 /* First check the bound address itself */
481 uintptr_t addr = ALIGN_UP(bound, PAGE_SIZE);
482 if (addr >= bound) {
483 if (guarded) {
484 /*
485 * Leave an unmapped page between the lower
486 * bound and the area's start address.
487 */
488 addr += P2SZ(1);
489 }
490
491 if (check_area_conflicts(as, addr, pages, guarded, NULL))
492 return addr;
493 }
494
495 /* Eventually check the addresses behind each area */
496 as_area_t *area = as_area_first(as);
497 while (area != NULL) {
498 mutex_lock(&area->lock);
499
500 addr = area->base + P2SZ(area->pages);
501
502 if (guarded || area->flags & AS_AREA_GUARD) {
503 /*
504 * We must leave an unmapped page
505 * between the two areas.
506 */
507 addr += P2SZ(1);
508 }
509
510 bool avail =
511 ((addr >= bound) && (addr >= area->base) &&
512 (check_area_conflicts(as, addr, pages, guarded, area)));
513
514 mutex_unlock(&area->lock);
515
516 if (avail)
517 return addr;
518
519 area = as_area_next(area);
520 }
521
522 /* No suitable address space area found */
523 return (uintptr_t) -1;
524}
525
526/** Remove reference to address space area share info.
527 *
528 * If the reference count drops to 0, the sh_info is deallocated.
529 *
530 * @param sh_info Pointer to address space area share info.
531 *
532 */
533NO_TRACE static void sh_info_remove_reference(share_info_t *sh_info)
534{
535 bool dealloc = false;
536
537 mutex_lock(&sh_info->lock);
538 assert(sh_info->refcount);
539
540 if (--sh_info->refcount == 0) {
541 dealloc = true;
542
543 /*
544 * Now walk carefully the pagemap B+tree and free/remove
545 * reference from all frames found there.
546 */
547 list_foreach(sh_info->pagemap.leaf_list, leaf_link,
548 btree_node_t, node) {
549 btree_key_t i;
550
551 for (i = 0; i < node->keys; i++)
552 frame_free((uintptr_t) node->value[i], 1);
553 }
554
555 }
556 mutex_unlock(&sh_info->lock);
557
558 if (dealloc) {
559 if (sh_info->backend && sh_info->backend->destroy_shared_data) {
560 sh_info->backend->destroy_shared_data(
561 sh_info->backend_shared_data);
562 }
563 btree_destroy(&sh_info->pagemap);
564 free(sh_info);
565 }
566}
567
568/** Create address space area of common attributes.
569 *
570 * The created address space area is added to the target address space.
571 *
572 * @param as Target address space.
573 * @param flags Flags of the area memory.
574 * @param size Size of area.
575 * @param attrs Attributes of the area.
576 * @param backend Address space area backend. NULL if no backend is used.
577 * @param backend_data NULL or a pointer to custom backend data.
578 * @param base Starting virtual address of the area.
579 * If set to AS_AREA_ANY, a suitable mappable area is
580 * found.
581 * @param bound Lowest address bound if base is set to AS_AREA_ANY.
582 * Otherwise ignored.
583 *
584 * @return Address space area on success or NULL on failure.
585 *
586 */
587as_area_t *as_area_create(as_t *as, unsigned int flags, size_t size,
588 unsigned int attrs, mem_backend_t *backend,
589 mem_backend_data_t *backend_data, uintptr_t *base, uintptr_t bound)
590{
591 if ((*base != (uintptr_t) AS_AREA_ANY) && !IS_ALIGNED(*base, PAGE_SIZE))
592 return NULL;
593
594 if (size == 0)
595 return NULL;
596
597 size_t pages = SIZE2FRAMES(size);
598
599 /* Writeable executable areas are not supported. */
600 if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
601 return NULL;
602
603 bool const guarded = flags & AS_AREA_GUARD;
604
605 mutex_lock(&as->lock);
606
607 if (*base == (uintptr_t) AS_AREA_ANY) {
608 *base = as_get_unmapped_area(as, bound, size, guarded);
609 if (*base == (uintptr_t) -1) {
610 mutex_unlock(&as->lock);
611 return NULL;
612 }
613 }
614
615 if (overflows_into_positive(*base, size)) {
616 mutex_unlock(&as->lock);
617 return NULL;
618 }
619
620 if (!check_area_conflicts(as, *base, pages, guarded, NULL)) {
621 mutex_unlock(&as->lock);
622 return NULL;
623 }
624
625 as_area_t *area = (as_area_t *) malloc(sizeof(as_area_t));
626 if (!area) {
627 mutex_unlock(&as->lock);
628 return NULL;
629 }
630
631 mutex_initialize(&area->lock, MUTEX_PASSIVE);
632
633 area->as = as;
634 odlink_initialize(&area->las_areas);
635 area->flags = flags;
636 area->attributes = attrs;
637 area->pages = pages;
638 area->resident = 0;
639 area->base = *base;
640 area->backend = backend;
641 area->sh_info = NULL;
642
643 if (backend_data)
644 area->backend_data = *backend_data;
645 else
646 memsetb(&area->backend_data, sizeof(area->backend_data), 0);
647
648 share_info_t *si = NULL;
649
650 /*
651 * Create the sharing info structure.
652 * We do this in advance for every new area, even if it is not going
653 * to be shared.
654 */
655 if (!(attrs & AS_AREA_ATTR_PARTIAL)) {
656 si = (share_info_t *) malloc(sizeof(share_info_t));
657 if (!si) {
658 free(area);
659 mutex_unlock(&as->lock);
660 return NULL;
661 }
662 mutex_initialize(&si->lock, MUTEX_PASSIVE);
663 si->refcount = 1;
664 si->shared = false;
665 si->backend_shared_data = NULL;
666 si->backend = backend;
667 btree_create(&si->pagemap);
668
669 area->sh_info = si;
670
671 if (area->backend && area->backend->create_shared_data) {
672 if (!area->backend->create_shared_data(area)) {
673 free(area);
674 mutex_unlock(&as->lock);
675 sh_info_remove_reference(si);
676 return NULL;
677 }
678 }
679 }
680
681 if (area->backend && area->backend->create) {
682 if (!area->backend->create(area)) {
683 free(area);
684 mutex_unlock(&as->lock);
685 if (!(attrs & AS_AREA_ATTR_PARTIAL))
686 sh_info_remove_reference(si);
687 return NULL;
688 }
689 }
690
691 btree_create(&area->used_space);
692 odict_insert(&area->las_areas, &as->as_areas, NULL);
693
694 mutex_unlock(&as->lock);
695
696 return area;
697}
698
699/** Find address space area and lock it.
700 *
701 * @param as Address space.
702 * @param va Virtual address.
703 *
704 * @return Locked address space area containing va on success or
705 * NULL on failure.
706 *
707 */
708NO_TRACE static as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
709{
710 assert(mutex_locked(&as->lock));
711
712 odlink_t *odlink = odict_find_leq(&as->as_areas, &va, NULL);
713 if (odlink == NULL)
714 return NULL;
715
716 as_area_t *area = odict_get_instance(odlink, as_area_t, las_areas);
717 mutex_lock(&area->lock);
718
719 assert(area->base <= va);
720
721 if (va <= area->base + (P2SZ(area->pages) - 1))
722 return area;
723
724 mutex_unlock(&area->lock);
725 return NULL;
726}
727
728/** Find address space area and change it.
729 *
730 * @param as Address space.
731 * @param address Virtual address belonging to the area to be changed.
732 * Must be page-aligned.
733 * @param size New size of the virtual memory block starting at
734 * address.
735 * @param flags Flags influencing the remap operation. Currently unused.
736 *
737 * @return Zero on success or a value from @ref errno.h otherwise.
738 *
739 */
740errno_t as_area_resize(as_t *as, uintptr_t address, size_t size, unsigned int flags)
741{
742 if (!IS_ALIGNED(address, PAGE_SIZE))
743 return EINVAL;
744
745 mutex_lock(&as->lock);
746
747 /*
748 * Locate the area.
749 */
750 as_area_t *area = find_area_and_lock(as, address);
751 if (!area) {
752 mutex_unlock(&as->lock);
753 return ENOENT;
754 }
755
756 if (!area->backend->is_resizable(area)) {
757 /*
758 * The backend does not support resizing for this area.
759 */
760 mutex_unlock(&area->lock);
761 mutex_unlock(&as->lock);
762 return ENOTSUP;
763 }
764
765 mutex_lock(&area->sh_info->lock);
766 if (area->sh_info->shared) {
767 /*
768 * Remapping of shared address space areas
769 * is not supported.
770 */
771 mutex_unlock(&area->sh_info->lock);
772 mutex_unlock(&area->lock);
773 mutex_unlock(&as->lock);
774 return ENOTSUP;
775 }
776 mutex_unlock(&area->sh_info->lock);
777
778 size_t pages = SIZE2FRAMES((address - area->base) + size);
779 if (!pages) {
780 /*
781 * Zero size address space areas are not allowed.
782 */
783 mutex_unlock(&area->lock);
784 mutex_unlock(&as->lock);
785 return EPERM;
786 }
787
788 if (pages < area->pages) {
789 uintptr_t start_free = area->base + P2SZ(pages);
790
791 /*
792 * Shrinking the area.
793 * No need to check for overlaps.
794 */
795
796 page_table_lock(as, false);
797
798 /*
799 * Remove frames belonging to used space starting from
800 * the highest addresses downwards until an overlap with
801 * the resized address space area is found. Note that this
802 * is also the right way to remove part of the used_space
803 * B+tree leaf list.
804 */
805 bool cond = true;
806 while (cond) {
807 assert(!list_empty(&area->used_space.leaf_list));
808
809 btree_node_t *node =
810 list_get_instance(list_last(&area->used_space.leaf_list),
811 btree_node_t, leaf_link);
812
813 if ((cond = (node->keys != 0))) {
814 uintptr_t ptr = node->key[node->keys - 1];
815 size_t node_size =
816 (size_t) node->value[node->keys - 1];
817 size_t i = 0;
818
819 if (overlaps(ptr, P2SZ(node_size), area->base,
820 P2SZ(pages))) {
821
822 if (ptr + P2SZ(node_size) <= start_free) {
823 /*
824 * The whole interval fits
825 * completely in the resized
826 * address space area.
827 */
828 break;
829 }
830
831 /*
832 * Part of the interval corresponding
833 * to b and c overlaps with the resized
834 * address space area.
835 */
836
837 /* We are almost done */
838 cond = false;
839 i = (start_free - ptr) >> PAGE_WIDTH;
840 if (!used_space_remove(area, start_free,
841 node_size - i))
842 panic("Cannot remove used space.");
843 } else {
844 /*
845 * The interval of used space can be
846 * completely removed.
847 */
848 if (!used_space_remove(area, ptr, node_size))
849 panic("Cannot remove used space.");
850 }
851
852 /*
853 * Start TLB shootdown sequence.
854 *
855 * The sequence is rather short and can be
856 * repeated multiple times. The reason is that
857 * we don't want to have used_space_remove()
858 * inside the sequence as it may use a blocking
859 * memory allocation for its B+tree. Blocking
860 * while holding the tlblock spinlock is
861 * forbidden and would hit a kernel assertion.
862 */
863
864 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES,
865 as->asid, area->base + P2SZ(pages),
866 area->pages - pages);
867
868 for (; i < node_size; i++) {
869 pte_t pte;
870 bool found = page_mapping_find(as,
871 ptr + P2SZ(i), false, &pte);
872
873 assert(found);
874 assert(PTE_VALID(&pte));
875 assert(PTE_PRESENT(&pte));
876
877 if ((area->backend) &&
878 (area->backend->frame_free)) {
879 area->backend->frame_free(area,
880 ptr + P2SZ(i),
881 PTE_GET_FRAME(&pte));
882 }
883
884 page_mapping_remove(as, ptr + P2SZ(i));
885 }
886
887 /*
888 * Finish TLB shootdown sequence.
889 */
890
891 tlb_invalidate_pages(as->asid,
892 area->base + P2SZ(pages),
893 area->pages - pages);
894
895 /*
896 * Invalidate software translation caches
897 * (e.g. TSB on sparc64, PHT on ppc32).
898 */
899 as_invalidate_translation_cache(as,
900 area->base + P2SZ(pages),
901 area->pages - pages);
902 tlb_shootdown_finalize(ipl);
903 }
904 }
905 page_table_unlock(as, false);
906 } else {
907 /*
908 * Growing the area.
909 */
910
911 if (overflows_into_positive(address, P2SZ(pages)))
912 return EINVAL;
913
914 /*
915 * Check for overlaps with other address space areas.
916 */
917 bool const guarded = area->flags & AS_AREA_GUARD;
918 if (!check_area_conflicts(as, address, pages, guarded, area)) {
919 mutex_unlock(&area->lock);
920 mutex_unlock(&as->lock);
921 return EADDRNOTAVAIL;
922 }
923 }
924
925 if (area->backend && area->backend->resize) {
926 if (!area->backend->resize(area, pages)) {
927 mutex_unlock(&area->lock);
928 mutex_unlock(&as->lock);
929 return ENOMEM;
930 }
931 }
932
933 area->pages = pages;
934
935 mutex_unlock(&area->lock);
936 mutex_unlock(&as->lock);
937
938 return 0;
939}
940
941/** Destroy address space area.
942 *
943 * @param as Address space.
944 * @param address Address within the area to be deleted.
945 *
946 * @return Zero on success or a value from @ref errno.h on failure.
947 *
948 */
949errno_t as_area_destroy(as_t *as, uintptr_t address)
950{
951 mutex_lock(&as->lock);
952
953 as_area_t *area = find_area_and_lock(as, address);
954 if (!area) {
955 mutex_unlock(&as->lock);
956 return ENOENT;
957 }
958
959 if (area->backend && area->backend->destroy)
960 area->backend->destroy(area);
961
962 page_table_lock(as, false);
963
964 /*
965 * Start TLB shootdown sequence.
966 */
967 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
968 area->pages);
969
970 /*
971 * Visit only the pages mapped by used_space B+tree.
972 */
973 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
974 node) {
975 btree_key_t i;
976
977 for (i = 0; i < node->keys; i++) {
978 uintptr_t ptr = node->key[i];
979 size_t size;
980
981 for (size = 0; size < (size_t) node->value[i]; size++) {
982 pte_t pte;
983 bool found = page_mapping_find(as,
984 ptr + P2SZ(size), false, &pte);
985
986 assert(found);
987 assert(PTE_VALID(&pte));
988 assert(PTE_PRESENT(&pte));
989
990 if ((area->backend) &&
991 (area->backend->frame_free)) {
992 area->backend->frame_free(area,
993 ptr + P2SZ(size),
994 PTE_GET_FRAME(&pte));
995 }
996
997 page_mapping_remove(as, ptr + P2SZ(size));
998 }
999 }
1000 }
1001
1002 /*
1003 * Finish TLB shootdown sequence.
1004 */
1005
1006 tlb_invalidate_pages(as->asid, area->base, area->pages);
1007
1008 /*
1009 * Invalidate potential software translation caches
1010 * (e.g. TSB on sparc64, PHT on ppc32).
1011 */
1012 as_invalidate_translation_cache(as, area->base, area->pages);
1013 tlb_shootdown_finalize(ipl);
1014
1015 page_table_unlock(as, false);
1016
1017 btree_destroy(&area->used_space);
1018
1019 area->attributes |= AS_AREA_ATTR_PARTIAL;
1020
1021 sh_info_remove_reference(area->sh_info);
1022
1023 mutex_unlock(&area->lock);
1024
1025 /*
1026 * Remove the empty area from address space.
1027 */
1028 odict_remove(&area->las_areas);
1029
1030 free(area);
1031
1032 mutex_unlock(&as->lock);
1033 return 0;
1034}
1035
1036/** Share address space area with another or the same address space.
1037 *
1038 * Address space area mapping is shared with a new address space area.
1039 * If the source address space area has not been shared so far,
1040 * a new sh_info is created. The new address space area simply gets the
1041 * sh_info of the source area. The process of duplicating the
1042 * mapping is done through the backend share function.
1043 *
1044 * @param src_as Pointer to source address space.
1045 * @param src_base Base address of the source address space area.
1046 * @param acc_size Expected size of the source area.
1047 * @param dst_as Pointer to destination address space.
1048 * @param dst_flags_mask Destination address space area flags mask.
1049 * @param dst_base Target base address. If set to -1,
1050 * a suitable mappable area is found.
1051 * @param bound Lowest address bound if dst_base is set to -1.
1052 * Otherwise ignored.
1053 *
1054 * @return Zero on success.
1055 * @return ENOENT if there is no such task or such address space.
1056 * @return EPERM if there was a problem in accepting the area.
1057 * @return ENOMEM if there was a problem in allocating destination
1058 * address space area.
1059 * @return ENOTSUP if the address space area backend does not support
1060 * sharing.
1061 *
1062 */
1063errno_t as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
1064 as_t *dst_as, unsigned int dst_flags_mask, uintptr_t *dst_base,
1065 uintptr_t bound)
1066{
1067 mutex_lock(&src_as->lock);
1068 as_area_t *src_area = find_area_and_lock(src_as, src_base);
1069 if (!src_area) {
1070 /*
1071 * Could not find the source address space area.
1072 */
1073 mutex_unlock(&src_as->lock);
1074 return ENOENT;
1075 }
1076
1077 if (!src_area->backend->is_shareable(src_area)) {
1078 /*
1079 * The backend does not permit sharing of this area.
1080 */
1081 mutex_unlock(&src_area->lock);
1082 mutex_unlock(&src_as->lock);
1083 return ENOTSUP;
1084 }
1085
1086 size_t src_size = P2SZ(src_area->pages);
1087 unsigned int src_flags = src_area->flags;
1088 mem_backend_t *src_backend = src_area->backend;
1089 mem_backend_data_t src_backend_data = src_area->backend_data;
1090
1091 /* Share the cacheable flag from the original mapping */
1092 if (src_flags & AS_AREA_CACHEABLE)
1093 dst_flags_mask |= AS_AREA_CACHEABLE;
1094
1095 if ((src_size != acc_size) ||
1096 ((src_flags & dst_flags_mask) != dst_flags_mask)) {
1097 mutex_unlock(&src_area->lock);
1098 mutex_unlock(&src_as->lock);
1099 return EPERM;
1100 }
1101
1102 /*
1103 * Now we are committed to sharing the area.
1104 * First, prepare the area for sharing.
1105 * Then it will be safe to unlock it.
1106 */
1107 share_info_t *sh_info = src_area->sh_info;
1108
1109 mutex_lock(&sh_info->lock);
1110 sh_info->refcount++;
1111 bool shared = sh_info->shared;
1112 sh_info->shared = true;
1113 mutex_unlock(&sh_info->lock);
1114
1115 if (!shared) {
1116 /*
1117 * Call the backend to setup sharing.
1118 * This only happens once for each sh_info.
1119 */
1120 src_area->backend->share(src_area);
1121 }
1122
1123 mutex_unlock(&src_area->lock);
1124 mutex_unlock(&src_as->lock);
1125
1126 /*
1127 * Create copy of the source address space area.
1128 * The destination area is created with AS_AREA_ATTR_PARTIAL
1129 * attribute set which prevents race condition with
1130 * preliminary as_page_fault() calls.
1131 * The flags of the source area are masked against dst_flags_mask
1132 * to support sharing in less privileged mode.
1133 */
1134 as_area_t *dst_area = as_area_create(dst_as, dst_flags_mask,
1135 src_size, AS_AREA_ATTR_PARTIAL, src_backend,
1136 &src_backend_data, dst_base, bound);
1137 if (!dst_area) {
1138 /*
1139 * Destination address space area could not be created.
1140 */
1141 sh_info_remove_reference(sh_info);
1142
1143 return ENOMEM;
1144 }
1145
1146 /*
1147 * Now the destination address space area has been
1148 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
1149 * attribute and set the sh_info.
1150 */
1151 mutex_lock(&dst_as->lock);
1152 mutex_lock(&dst_area->lock);
1153 dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
1154 dst_area->sh_info = sh_info;
1155 mutex_unlock(&dst_area->lock);
1156 mutex_unlock(&dst_as->lock);
1157
1158 return 0;
1159}
1160
1161/** Check access mode for address space area.
1162 *
1163 * @param area Address space area.
1164 * @param access Access mode.
1165 *
1166 * @return False if access violates area's permissions, true
1167 * otherwise.
1168 *
1169 */
1170NO_TRACE bool as_area_check_access(as_area_t *area, pf_access_t access)
1171{
1172 assert(mutex_locked(&area->lock));
1173
1174 int flagmap[] = {
1175 [PF_ACCESS_READ] = AS_AREA_READ,
1176 [PF_ACCESS_WRITE] = AS_AREA_WRITE,
1177 [PF_ACCESS_EXEC] = AS_AREA_EXEC
1178 };
1179
1180 if (!(area->flags & flagmap[access]))
1181 return false;
1182
1183 return true;
1184}
1185
1186/** Convert address space area flags to page flags.
1187 *
1188 * @param aflags Flags of some address space area.
1189 *
1190 * @return Flags to be passed to page_mapping_insert().
1191 *
1192 */
1193NO_TRACE static unsigned int area_flags_to_page_flags(unsigned int aflags)
1194{
1195 unsigned int flags = PAGE_USER | PAGE_PRESENT;
1196
1197 if (aflags & AS_AREA_READ)
1198 flags |= PAGE_READ;
1199
1200 if (aflags & AS_AREA_WRITE)
1201 flags |= PAGE_WRITE;
1202
1203 if (aflags & AS_AREA_EXEC)
1204 flags |= PAGE_EXEC;
1205
1206 if (aflags & AS_AREA_CACHEABLE)
1207 flags |= PAGE_CACHEABLE;
1208
1209 return flags;
1210}
1211
1212/** Change adress space area flags.
1213 *
1214 * The idea is to have the same data, but with a different access mode.
1215 * This is needed e.g. for writing code into memory and then executing it.
1216 * In order for this to work properly, this may copy the data
1217 * into private anonymous memory (unless it's already there).
1218 *
1219 * @param as Address space.
1220 * @param flags Flags of the area memory.
1221 * @param address Address within the area to be changed.
1222 *
1223 * @return Zero on success or a value from @ref errno.h on failure.
1224 *
1225 */
1226errno_t as_area_change_flags(as_t *as, unsigned int flags, uintptr_t address)
1227{
1228 /* Flags for the new memory mapping */
1229 unsigned int page_flags = area_flags_to_page_flags(flags);
1230
1231 mutex_lock(&as->lock);
1232
1233 as_area_t *area = find_area_and_lock(as, address);
1234 if (!area) {
1235 mutex_unlock(&as->lock);
1236 return ENOENT;
1237 }
1238
1239 if (area->backend != &anon_backend) {
1240 /* Copying non-anonymous memory not supported yet */
1241 mutex_unlock(&area->lock);
1242 mutex_unlock(&as->lock);
1243 return ENOTSUP;
1244 }
1245
1246 mutex_lock(&area->sh_info->lock);
1247 if (area->sh_info->shared) {
1248 /* Copying shared areas not supported yet */
1249 mutex_unlock(&area->sh_info->lock);
1250 mutex_unlock(&area->lock);
1251 mutex_unlock(&as->lock);
1252 return ENOTSUP;
1253 }
1254 mutex_unlock(&area->sh_info->lock);
1255
1256 /*
1257 * Compute total number of used pages in the used_space B+tree
1258 */
1259 size_t used_pages = 0;
1260
1261 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1262 node) {
1263 btree_key_t i;
1264
1265 for (i = 0; i < node->keys; i++)
1266 used_pages += (size_t) node->value[i];
1267 }
1268
1269 /* An array for storing frame numbers */
1270 uintptr_t *old_frame = malloc(used_pages * sizeof(uintptr_t));
1271 if (!old_frame) {
1272 mutex_unlock(&area->lock);
1273 mutex_unlock(&as->lock);
1274 return ENOMEM;
1275 }
1276
1277 page_table_lock(as, false);
1278
1279 /*
1280 * Start TLB shootdown sequence.
1281 */
1282 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
1283 area->pages);
1284
1285 /*
1286 * Remove used pages from page tables and remember their frame
1287 * numbers.
1288 */
1289 size_t frame_idx = 0;
1290
1291 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1292 node) {
1293 btree_key_t i;
1294
1295 for (i = 0; i < node->keys; i++) {
1296 uintptr_t ptr = node->key[i];
1297 size_t size;
1298
1299 for (size = 0; size < (size_t) node->value[i]; size++) {
1300 pte_t pte;
1301 bool found = page_mapping_find(as,
1302 ptr + P2SZ(size), false, &pte);
1303
1304 assert(found);
1305 assert(PTE_VALID(&pte));
1306 assert(PTE_PRESENT(&pte));
1307
1308 old_frame[frame_idx++] = PTE_GET_FRAME(&pte);
1309
1310 /* Remove old mapping */
1311 page_mapping_remove(as, ptr + P2SZ(size));
1312 }
1313 }
1314 }
1315
1316 /*
1317 * Finish TLB shootdown sequence.
1318 */
1319
1320 tlb_invalidate_pages(as->asid, area->base, area->pages);
1321
1322 /*
1323 * Invalidate potential software translation caches
1324 * (e.g. TSB on sparc64, PHT on ppc32).
1325 */
1326 as_invalidate_translation_cache(as, area->base, area->pages);
1327 tlb_shootdown_finalize(ipl);
1328
1329 page_table_unlock(as, false);
1330
1331 /*
1332 * Set the new flags.
1333 */
1334 area->flags = flags;
1335
1336 /*
1337 * Map pages back in with new flags. This step is kept separate
1338 * so that the memory area could not be accesed with both the old and
1339 * the new flags at once.
1340 */
1341 frame_idx = 0;
1342
1343 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1344 node) {
1345 btree_key_t i;
1346
1347 for (i = 0; i < node->keys; i++) {
1348 uintptr_t ptr = node->key[i];
1349 size_t size;
1350
1351 for (size = 0; size < (size_t) node->value[i]; size++) {
1352 page_table_lock(as, false);
1353
1354 /* Insert the new mapping */
1355 page_mapping_insert(as, ptr + P2SZ(size),
1356 old_frame[frame_idx++], page_flags);
1357
1358 page_table_unlock(as, false);
1359 }
1360 }
1361 }
1362
1363 free(old_frame);
1364
1365 mutex_unlock(&area->lock);
1366 mutex_unlock(&as->lock);
1367
1368 return 0;
1369}
1370
1371/** Handle page fault within the current address space.
1372 *
1373 * This is the high-level page fault handler. It decides whether the page fault
1374 * can be resolved by any backend and if so, it invokes the backend to resolve
1375 * the page fault.
1376 *
1377 * Interrupts are assumed disabled.
1378 *
1379 * @param address Faulting address.
1380 * @param access Access mode that caused the page fault (i.e.
1381 * read/write/exec).
1382 * @param istate Pointer to the interrupted state.
1383 *
1384 * @return AS_PF_FAULT on page fault.
1385 * @return AS_PF_OK on success.
1386 * @return AS_PF_DEFER if the fault was caused by copy_to_uspace()
1387 * or copy_from_uspace().
1388 *
1389 */
1390int as_page_fault(uintptr_t address, pf_access_t access, istate_t *istate)
1391{
1392 uintptr_t page = ALIGN_DOWN(address, PAGE_SIZE);
1393 int rc = AS_PF_FAULT;
1394
1395 if (!THREAD)
1396 goto page_fault;
1397
1398 if (!AS)
1399 goto page_fault;
1400
1401 mutex_lock(&AS->lock);
1402 as_area_t *area = find_area_and_lock(AS, page);
1403 if (!area) {
1404 /*
1405 * No area contained mapping for 'page'.
1406 * Signal page fault to low-level handler.
1407 */
1408 mutex_unlock(&AS->lock);
1409 goto page_fault;
1410 }
1411
1412 if (area->attributes & AS_AREA_ATTR_PARTIAL) {
1413 /*
1414 * The address space area is not fully initialized.
1415 * Avoid possible race by returning error.
1416 */
1417 mutex_unlock(&area->lock);
1418 mutex_unlock(&AS->lock);
1419 goto page_fault;
1420 }
1421
1422 if ((!area->backend) || (!area->backend->page_fault)) {
1423 /*
1424 * The address space area is not backed by any backend
1425 * or the backend cannot handle page faults.
1426 */
1427 mutex_unlock(&area->lock);
1428 mutex_unlock(&AS->lock);
1429 goto page_fault;
1430 }
1431
1432 page_table_lock(AS, false);
1433
1434 /*
1435 * To avoid race condition between two page faults on the same address,
1436 * we need to make sure the mapping has not been already inserted.
1437 */
1438 pte_t pte;
1439 bool found = page_mapping_find(AS, page, false, &pte);
1440 if (found && PTE_PRESENT(&pte)) {
1441 if (((access == PF_ACCESS_READ) && PTE_READABLE(&pte)) ||
1442 (access == PF_ACCESS_WRITE && PTE_WRITABLE(&pte)) ||
1443 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(&pte))) {
1444 page_table_unlock(AS, false);
1445 mutex_unlock(&area->lock);
1446 mutex_unlock(&AS->lock);
1447 return AS_PF_OK;
1448 }
1449 }
1450
1451 /*
1452 * Resort to the backend page fault handler.
1453 */
1454 rc = area->backend->page_fault(area, page, access);
1455 if (rc != AS_PF_OK) {
1456 page_table_unlock(AS, false);
1457 mutex_unlock(&area->lock);
1458 mutex_unlock(&AS->lock);
1459 goto page_fault;
1460 }
1461
1462 page_table_unlock(AS, false);
1463 mutex_unlock(&area->lock);
1464 mutex_unlock(&AS->lock);
1465 return AS_PF_OK;
1466
1467page_fault:
1468 if (THREAD && THREAD->in_copy_from_uspace) {
1469 THREAD->in_copy_from_uspace = false;
1470 istate_set_retaddr(istate,
1471 (uintptr_t) &memcpy_from_uspace_failover_address);
1472 } else if (THREAD && THREAD->in_copy_to_uspace) {
1473 THREAD->in_copy_to_uspace = false;
1474 istate_set_retaddr(istate,
1475 (uintptr_t) &memcpy_to_uspace_failover_address);
1476 } else if (rc == AS_PF_SILENT) {
1477 printf("Killing task %" PRIu64 " due to a "
1478 "failed late reservation request.\n", TASK->taskid);
1479 task_kill_self(true);
1480 } else {
1481 fault_if_from_uspace(istate, "Page fault: %p.", (void *) address);
1482 panic_memtrap(istate, access, address, NULL);
1483 }
1484
1485 return AS_PF_DEFER;
1486}
1487
1488/** Switch address spaces.
1489 *
1490 * Note that this function cannot sleep as it is essentially a part of
1491 * scheduling. Sleeping here would lead to deadlock on wakeup. Another
1492 * thing which is forbidden in this context is locking the address space.
1493 *
1494 * When this function is entered, no spinlocks may be held.
1495 *
1496 * @param old Old address space or NULL.
1497 * @param new New address space.
1498 *
1499 */
1500void as_switch(as_t *old_as, as_t *new_as)
1501{
1502 DEADLOCK_PROBE_INIT(p_asidlock);
1503 preemption_disable();
1504
1505retry:
1506 (void) interrupts_disable();
1507 if (!spinlock_trylock(&asidlock)) {
1508 /*
1509 * Avoid deadlock with TLB shootdown.
1510 * We can enable interrupts here because
1511 * preemption is disabled. We should not be
1512 * holding any other lock.
1513 */
1514 (void) interrupts_enable();
1515 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
1516 goto retry;
1517 }
1518 preemption_enable();
1519
1520 /*
1521 * First, take care of the old address space.
1522 */
1523 if (old_as) {
1524 assert(old_as->cpu_refcount);
1525
1526 if ((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
1527 /*
1528 * The old address space is no longer active on
1529 * any processor. It can be appended to the
1530 * list of inactive address spaces with assigned
1531 * ASID.
1532 */
1533 assert(old_as->asid != ASID_INVALID);
1534
1535 list_append(&old_as->inactive_as_with_asid_link,
1536 &inactive_as_with_asid_list);
1537 }
1538
1539 /*
1540 * Perform architecture-specific tasks when the address space
1541 * is being removed from the CPU.
1542 */
1543 as_deinstall_arch(old_as);
1544 }
1545
1546 /*
1547 * Second, prepare the new address space.
1548 */
1549 if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
1550 if (new_as->asid != ASID_INVALID)
1551 list_remove(&new_as->inactive_as_with_asid_link);
1552 else
1553 new_as->asid = asid_get();
1554 }
1555
1556#ifdef AS_PAGE_TABLE
1557 SET_PTL0_ADDRESS(new_as->genarch.page_table);
1558#endif
1559
1560 /*
1561 * Perform architecture-specific steps.
1562 * (e.g. write ASID to hardware register etc.)
1563 */
1564 as_install_arch(new_as);
1565
1566 spinlock_unlock(&asidlock);
1567
1568 AS = new_as;
1569}
1570
1571/** Compute flags for virtual address translation subsytem.
1572 *
1573 * @param area Address space area.
1574 *
1575 * @return Flags to be used in page_mapping_insert().
1576 *
1577 */
1578NO_TRACE unsigned int as_area_get_flags(as_area_t *area)
1579{
1580 assert(mutex_locked(&area->lock));
1581
1582 return area_flags_to_page_flags(area->flags);
1583}
1584
1585/** Get key function for the @c as_t.as_areas ordered dictionary.
1586 *
1587 * @param odlink Link
1588 * @return Pointer to task ID cast as 'void *'
1589 */
1590static void *as_areas_getkey(odlink_t *odlink)
1591{
1592 as_area_t *area = odict_get_instance(odlink, as_area_t, las_areas);
1593 return (void *) &area->base;
1594}
1595
1596/** Key comparison function for the @c as_t.as_areas ordered dictionary.
1597 *
1598 * @param a Pointer to area A base
1599 * @param b Pointer to area B base
1600 * @return -1, 0, 1 iff base of A is lower than, equal to, higher than B
1601 */
1602static int as_areas_cmp(void *a, void *b)
1603{
1604 uintptr_t base_a = *(uintptr_t *)a;
1605 uintptr_t base_b = *(uintptr_t *)b;
1606
1607 if (base_a < base_b)
1608 return -1;
1609 else if (base_a == base_b)
1610 return 0;
1611 else
1612 return +1;
1613}
1614
1615/** Create page table.
1616 *
1617 * Depending on architecture, create either address space private or global page
1618 * table.
1619 *
1620 * @param flags Flags saying whether the page table is for the kernel
1621 * address space.
1622 *
1623 * @return First entry of the page table.
1624 *
1625 */
1626NO_TRACE pte_t *page_table_create(unsigned int flags)
1627{
1628 assert(as_operations);
1629 assert(as_operations->page_table_create);
1630
1631 return as_operations->page_table_create(flags);
1632}
1633
1634/** Destroy page table.
1635 *
1636 * Destroy page table in architecture specific way.
1637 *
1638 * @param page_table Physical address of PTL0.
1639 *
1640 */
1641NO_TRACE void page_table_destroy(pte_t *page_table)
1642{
1643 assert(as_operations);
1644 assert(as_operations->page_table_destroy);
1645
1646 as_operations->page_table_destroy(page_table);
1647}
1648
1649/** Lock page table.
1650 *
1651 * This function should be called before any page_mapping_insert(),
1652 * page_mapping_remove() and page_mapping_find().
1653 *
1654 * Locking order is such that address space areas must be locked
1655 * prior to this call. Address space can be locked prior to this
1656 * call in which case the lock argument is false.
1657 *
1658 * @param as Address space.
1659 * @param lock If false, do not attempt to lock as->lock.
1660 *
1661 */
1662NO_TRACE void page_table_lock(as_t *as, bool lock)
1663{
1664 assert(as_operations);
1665 assert(as_operations->page_table_lock);
1666
1667 as_operations->page_table_lock(as, lock);
1668}
1669
1670/** Unlock page table.
1671 *
1672 * @param as Address space.
1673 * @param unlock If false, do not attempt to unlock as->lock.
1674 *
1675 */
1676NO_TRACE void page_table_unlock(as_t *as, bool unlock)
1677{
1678 assert(as_operations);
1679 assert(as_operations->page_table_unlock);
1680
1681 as_operations->page_table_unlock(as, unlock);
1682}
1683
1684/** Test whether page tables are locked.
1685 *
1686 * @param as Address space where the page tables belong.
1687 *
1688 * @return True if the page tables belonging to the address soace
1689 * are locked, otherwise false.
1690 */
1691NO_TRACE bool page_table_locked(as_t *as)
1692{
1693 assert(as_operations);
1694 assert(as_operations->page_table_locked);
1695
1696 return as_operations->page_table_locked(as);
1697}
1698
1699/** Return size of the address space area with given base.
1700 *
1701 * @param base Arbitrary address inside the address space area.
1702 *
1703 * @return Size of the address space area in bytes or zero if it
1704 * does not exist.
1705 *
1706 */
1707size_t as_area_get_size(uintptr_t base)
1708{
1709 size_t size;
1710
1711 page_table_lock(AS, true);
1712 as_area_t *src_area = find_area_and_lock(AS, base);
1713
1714 if (src_area) {
1715 size = P2SZ(src_area->pages);
1716 mutex_unlock(&src_area->lock);
1717 } else
1718 size = 0;
1719
1720 page_table_unlock(AS, true);
1721 return size;
1722}
1723
1724/** Mark portion of address space area as used.
1725 *
1726 * The address space area must be already locked.
1727 *
1728 * @param area Address space area.
1729 * @param page First page to be marked.
1730 * @param count Number of page to be marked.
1731 *
1732 * @return False on failure or true on success.
1733 *
1734 */
1735bool used_space_insert(as_area_t *area, uintptr_t page, size_t count)
1736{
1737 assert(mutex_locked(&area->lock));
1738 assert(IS_ALIGNED(page, PAGE_SIZE));
1739 assert(count);
1740
1741 btree_node_t *leaf = NULL;
1742 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1743 if (pages) {
1744 /*
1745 * We hit the beginning of some used space.
1746 */
1747 return false;
1748 }
1749
1750 assert(leaf != NULL);
1751
1752 if (!leaf->keys) {
1753 btree_insert(&area->used_space, page, (void *) count, leaf);
1754 goto success;
1755 }
1756
1757 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
1758 if (node) {
1759 uintptr_t left_pg = node->key[node->keys - 1];
1760 uintptr_t right_pg = leaf->key[0];
1761 size_t left_cnt = (size_t) node->value[node->keys - 1];
1762 size_t right_cnt = (size_t) leaf->value[0];
1763
1764 /*
1765 * Examine the possibility that the interval fits
1766 * somewhere between the rightmost interval of
1767 * the left neigbour and the first interval of the leaf.
1768 */
1769
1770 if (page >= right_pg) {
1771 /* Do nothing. */
1772 } else if (overlaps(page, P2SZ(count), left_pg,
1773 P2SZ(left_cnt))) {
1774 /* The interval intersects with the left interval. */
1775 return false;
1776 } else if (overlaps(page, P2SZ(count), right_pg,
1777 P2SZ(right_cnt))) {
1778 /* The interval intersects with the right interval. */
1779 return false;
1780 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1781 (page + P2SZ(count) == right_pg)) {
1782 /*
1783 * The interval can be added by merging the two already
1784 * present intervals.
1785 */
1786 node->value[node->keys - 1] += count + right_cnt;
1787 btree_remove(&area->used_space, right_pg, leaf);
1788 goto success;
1789 } else if (page == left_pg + P2SZ(left_cnt)) {
1790 /*
1791 * The interval can be added by simply growing the left
1792 * interval.
1793 */
1794 node->value[node->keys - 1] += count;
1795 goto success;
1796 } else if (page + P2SZ(count) == right_pg) {
1797 /*
1798 * The interval can be addded by simply moving base of
1799 * the right interval down and increasing its size
1800 * accordingly.
1801 */
1802 leaf->value[0] += count;
1803 leaf->key[0] = page;
1804 goto success;
1805 } else {
1806 /*
1807 * The interval is between both neigbouring intervals,
1808 * but cannot be merged with any of them.
1809 */
1810 btree_insert(&area->used_space, page, (void *) count,
1811 leaf);
1812 goto success;
1813 }
1814 } else if (page < leaf->key[0]) {
1815 uintptr_t right_pg = leaf->key[0];
1816 size_t right_cnt = (size_t) leaf->value[0];
1817
1818 /*
1819 * Investigate the border case in which the left neighbour does
1820 * not exist but the interval fits from the left.
1821 */
1822
1823 if (overlaps(page, P2SZ(count), right_pg, P2SZ(right_cnt))) {
1824 /* The interval intersects with the right interval. */
1825 return false;
1826 } else if (page + P2SZ(count) == right_pg) {
1827 /*
1828 * The interval can be added by moving the base of the
1829 * right interval down and increasing its size
1830 * accordingly.
1831 */
1832 leaf->key[0] = page;
1833 leaf->value[0] += count;
1834 goto success;
1835 } else {
1836 /*
1837 * The interval doesn't adjoin with the right interval.
1838 * It must be added individually.
1839 */
1840 btree_insert(&area->used_space, page, (void *) count,
1841 leaf);
1842 goto success;
1843 }
1844 }
1845
1846 node = btree_leaf_node_right_neighbour(&area->used_space, leaf);
1847 if (node) {
1848 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1849 uintptr_t right_pg = node->key[0];
1850 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1851 size_t right_cnt = (size_t) node->value[0];
1852
1853 /*
1854 * Examine the possibility that the interval fits
1855 * somewhere between the leftmost interval of
1856 * the right neigbour and the last interval of the leaf.
1857 */
1858
1859 if (page < left_pg) {
1860 /* Do nothing. */
1861 } else if (overlaps(page, P2SZ(count), left_pg,
1862 P2SZ(left_cnt))) {
1863 /* The interval intersects with the left interval. */
1864 return false;
1865 } else if (overlaps(page, P2SZ(count), right_pg,
1866 P2SZ(right_cnt))) {
1867 /* The interval intersects with the right interval. */
1868 return false;
1869 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1870 (page + P2SZ(count) == right_pg)) {
1871 /*
1872 * The interval can be added by merging the two already
1873 * present intervals.
1874 */
1875 leaf->value[leaf->keys - 1] += count + right_cnt;
1876 btree_remove(&area->used_space, right_pg, node);
1877 goto success;
1878 } else if (page == left_pg + P2SZ(left_cnt)) {
1879 /*
1880 * The interval can be added by simply growing the left
1881 * interval.
1882 */
1883 leaf->value[leaf->keys - 1] += count;
1884 goto success;
1885 } else if (page + P2SZ(count) == right_pg) {
1886 /*
1887 * The interval can be addded by simply moving base of
1888 * the right interval down and increasing its size
1889 * accordingly.
1890 */
1891 node->value[0] += count;
1892 node->key[0] = page;
1893 goto success;
1894 } else {
1895 /*
1896 * The interval is between both neigbouring intervals,
1897 * but cannot be merged with any of them.
1898 */
1899 btree_insert(&area->used_space, page, (void *) count,
1900 leaf);
1901 goto success;
1902 }
1903 } else if (page >= leaf->key[leaf->keys - 1]) {
1904 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1905 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1906
1907 /*
1908 * Investigate the border case in which the right neighbour
1909 * does not exist but the interval fits from the right.
1910 */
1911
1912 if (overlaps(page, P2SZ(count), left_pg, P2SZ(left_cnt))) {
1913 /* The interval intersects with the left interval. */
1914 return false;
1915 } else if (left_pg + P2SZ(left_cnt) == page) {
1916 /*
1917 * The interval can be added by growing the left
1918 * interval.
1919 */
1920 leaf->value[leaf->keys - 1] += count;
1921 goto success;
1922 } else {
1923 /*
1924 * The interval doesn't adjoin with the left interval.
1925 * It must be added individually.
1926 */
1927 btree_insert(&area->used_space, page, (void *) count,
1928 leaf);
1929 goto success;
1930 }
1931 }
1932
1933 /*
1934 * Note that if the algorithm made it thus far, the interval can fit
1935 * only between two other intervals of the leaf. The two border cases
1936 * were already resolved.
1937 */
1938 btree_key_t i;
1939 for (i = 1; i < leaf->keys; i++) {
1940 if (page < leaf->key[i]) {
1941 uintptr_t left_pg = leaf->key[i - 1];
1942 uintptr_t right_pg = leaf->key[i];
1943 size_t left_cnt = (size_t) leaf->value[i - 1];
1944 size_t right_cnt = (size_t) leaf->value[i];
1945
1946 /*
1947 * The interval fits between left_pg and right_pg.
1948 */
1949
1950 if (overlaps(page, P2SZ(count), left_pg,
1951 P2SZ(left_cnt))) {
1952 /*
1953 * The interval intersects with the left
1954 * interval.
1955 */
1956 return false;
1957 } else if (overlaps(page, P2SZ(count), right_pg,
1958 P2SZ(right_cnt))) {
1959 /*
1960 * The interval intersects with the right
1961 * interval.
1962 */
1963 return false;
1964 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1965 (page + P2SZ(count) == right_pg)) {
1966 /*
1967 * The interval can be added by merging the two
1968 * already present intervals.
1969 */
1970 leaf->value[i - 1] += count + right_cnt;
1971 btree_remove(&area->used_space, right_pg, leaf);
1972 goto success;
1973 } else if (page == left_pg + P2SZ(left_cnt)) {
1974 /*
1975 * The interval can be added by simply growing
1976 * the left interval.
1977 */
1978 leaf->value[i - 1] += count;
1979 goto success;
1980 } else if (page + P2SZ(count) == right_pg) {
1981 /*
1982 * The interval can be addded by simply moving
1983 * base of the right interval down and
1984 * increasing its size accordingly.
1985 */
1986 leaf->value[i] += count;
1987 leaf->key[i] = page;
1988 goto success;
1989 } else {
1990 /*
1991 * The interval is between both neigbouring
1992 * intervals, but cannot be merged with any of
1993 * them.
1994 */
1995 btree_insert(&area->used_space, page,
1996 (void *) count, leaf);
1997 goto success;
1998 }
1999 }
2000 }
2001
2002 panic("Inconsistency detected while adding %zu pages of used "
2003 "space at %p.", count, (void *) page);
2004
2005success:
2006 area->resident += count;
2007 return true;
2008}
2009
2010/** Mark portion of address space area as unused.
2011 *
2012 * The address space area must be already locked.
2013 *
2014 * @param area Address space area.
2015 * @param page First page to be marked.
2016 * @param count Number of page to be marked.
2017 *
2018 * @return False on failure or true on success.
2019 *
2020 */
2021bool used_space_remove(as_area_t *area, uintptr_t page, size_t count)
2022{
2023 assert(mutex_locked(&area->lock));
2024 assert(IS_ALIGNED(page, PAGE_SIZE));
2025 assert(count);
2026
2027 btree_node_t *leaf;
2028 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
2029 if (pages) {
2030 /*
2031 * We are lucky, page is the beginning of some interval.
2032 */
2033 if (count > pages) {
2034 return false;
2035 } else if (count == pages) {
2036 btree_remove(&area->used_space, page, leaf);
2037 goto success;
2038 } else {
2039 /*
2040 * Find the respective interval.
2041 * Decrease its size and relocate its start address.
2042 */
2043 btree_key_t i;
2044 for (i = 0; i < leaf->keys; i++) {
2045 if (leaf->key[i] == page) {
2046 leaf->key[i] += P2SZ(count);
2047 leaf->value[i] -= count;
2048 goto success;
2049 }
2050 }
2051
2052 goto error;
2053 }
2054 }
2055
2056 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space,
2057 leaf);
2058 if ((node) && (page < leaf->key[0])) {
2059 uintptr_t left_pg = node->key[node->keys - 1];
2060 size_t left_cnt = (size_t) node->value[node->keys - 1];
2061
2062 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2063 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2064 /*
2065 * The interval is contained in the rightmost
2066 * interval of the left neighbour and can be
2067 * removed by updating the size of the bigger
2068 * interval.
2069 */
2070 node->value[node->keys - 1] -= count;
2071 goto success;
2072 } else if (page + P2SZ(count) <
2073 left_pg + P2SZ(left_cnt)) {
2074 size_t new_cnt;
2075
2076 /*
2077 * The interval is contained in the rightmost
2078 * interval of the left neighbour but its
2079 * removal requires both updating the size of
2080 * the original interval and also inserting a
2081 * new interval.
2082 */
2083 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2084 (page + P2SZ(count))) >> PAGE_WIDTH;
2085 node->value[node->keys - 1] -= count + new_cnt;
2086 btree_insert(&area->used_space, page +
2087 P2SZ(count), (void *) new_cnt, leaf);
2088 goto success;
2089 }
2090 }
2091
2092 return false;
2093 } else if (page < leaf->key[0])
2094 return false;
2095
2096 if (page > leaf->key[leaf->keys - 1]) {
2097 uintptr_t left_pg = leaf->key[leaf->keys - 1];
2098 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
2099
2100 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2101 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2102 /*
2103 * The interval is contained in the rightmost
2104 * interval of the leaf and can be removed by
2105 * updating the size of the bigger interval.
2106 */
2107 leaf->value[leaf->keys - 1] -= count;
2108 goto success;
2109 } else if (page + P2SZ(count) < left_pg +
2110 P2SZ(left_cnt)) {
2111 size_t new_cnt;
2112
2113 /*
2114 * The interval is contained in the rightmost
2115 * interval of the leaf but its removal
2116 * requires both updating the size of the
2117 * original interval and also inserting a new
2118 * interval.
2119 */
2120 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2121 (page + P2SZ(count))) >> PAGE_WIDTH;
2122 leaf->value[leaf->keys - 1] -= count + new_cnt;
2123 btree_insert(&area->used_space, page +
2124 P2SZ(count), (void *) new_cnt, leaf);
2125 goto success;
2126 }
2127 }
2128
2129 return false;
2130 }
2131
2132 /*
2133 * The border cases have been already resolved.
2134 * Now the interval can be only between intervals of the leaf.
2135 */
2136 btree_key_t i;
2137 for (i = 1; i < leaf->keys - 1; i++) {
2138 if (page < leaf->key[i]) {
2139 uintptr_t left_pg = leaf->key[i - 1];
2140 size_t left_cnt = (size_t) leaf->value[i - 1];
2141
2142 /*
2143 * Now the interval is between intervals corresponding
2144 * to (i - 1) and i.
2145 */
2146 if (overlaps(left_pg, P2SZ(left_cnt), page,
2147 P2SZ(count))) {
2148 if (page + P2SZ(count) ==
2149 left_pg + P2SZ(left_cnt)) {
2150 /*
2151 * The interval is contained in the
2152 * interval (i - 1) of the leaf and can
2153 * be removed by updating the size of
2154 * the bigger interval.
2155 */
2156 leaf->value[i - 1] -= count;
2157 goto success;
2158 } else if (page + P2SZ(count) <
2159 left_pg + P2SZ(left_cnt)) {
2160 size_t new_cnt;
2161
2162 /*
2163 * The interval is contained in the
2164 * interval (i - 1) of the leaf but its
2165 * removal requires both updating the
2166 * size of the original interval and
2167 * also inserting a new interval.
2168 */
2169 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2170 (page + P2SZ(count))) >>
2171 PAGE_WIDTH;
2172 leaf->value[i - 1] -= count + new_cnt;
2173 btree_insert(&area->used_space, page +
2174 P2SZ(count), (void *) new_cnt,
2175 leaf);
2176 goto success;
2177 }
2178 }
2179
2180 return false;
2181 }
2182 }
2183
2184error:
2185 panic("Inconsistency detected while removing %zu pages of used "
2186 "space from %p.", count, (void *) page);
2187
2188success:
2189 area->resident -= count;
2190 return true;
2191}
2192
2193/*
2194 * Address space related syscalls.
2195 */
2196
2197sysarg_t sys_as_area_create(uintptr_t base, size_t size, unsigned int flags,
2198 uintptr_t bound, as_area_pager_info_t *pager_info)
2199{
2200 uintptr_t virt = base;
2201 mem_backend_t *backend;
2202 mem_backend_data_t backend_data;
2203
2204 if (pager_info == AS_AREA_UNPAGED)
2205 backend = &anon_backend;
2206 else {
2207 backend = &user_backend;
2208 if (copy_from_uspace(&backend_data.pager_info, pager_info,
2209 sizeof(as_area_pager_info_t)) != EOK) {
2210 return (sysarg_t) AS_MAP_FAILED;
2211 }
2212 }
2213 as_area_t *area = as_area_create(AS, flags, size,
2214 AS_AREA_ATTR_NONE, backend, &backend_data, &virt, bound);
2215 if (area == NULL)
2216 return (sysarg_t) AS_MAP_FAILED;
2217
2218 return (sysarg_t) virt;
2219}
2220
2221sys_errno_t sys_as_area_resize(uintptr_t address, size_t size, unsigned int flags)
2222{
2223 return (sys_errno_t) as_area_resize(AS, address, size, 0);
2224}
2225
2226sys_errno_t sys_as_area_change_flags(uintptr_t address, unsigned int flags)
2227{
2228 return (sys_errno_t) as_area_change_flags(AS, flags, address);
2229}
2230
2231sys_errno_t sys_as_area_destroy(uintptr_t address)
2232{
2233 return (sys_errno_t) as_area_destroy(AS, address);
2234}
2235
2236/** Get list of adress space areas.
2237 *
2238 * @param as Address space.
2239 * @param obuf Place to save pointer to returned buffer.
2240 * @param osize Place to save size of returned buffer.
2241 *
2242 */
2243as_area_info_t *as_get_area_info(as_t *as, size_t *osize)
2244{
2245 mutex_lock(&as->lock);
2246
2247 /* Count number of areas. */
2248 size_t area_cnt = odict_count(&as->as_areas);
2249
2250 size_t isize = area_cnt * sizeof(as_area_info_t);
2251 as_area_info_t *info = malloc(isize);
2252 if (!info) {
2253 mutex_unlock(&as->lock);
2254 return NULL;
2255 }
2256
2257 /* Record area data. */
2258
2259 size_t area_idx = 0;
2260
2261 as_area_t *area = as_area_first(as);
2262 while (area != NULL) {
2263 assert(area_idx < area_cnt);
2264 mutex_lock(&area->lock);
2265
2266 info[area_idx].start_addr = area->base;
2267 info[area_idx].size = P2SZ(area->pages);
2268 info[area_idx].flags = area->flags;
2269 ++area_idx;
2270
2271 mutex_unlock(&area->lock);
2272 area = as_area_next(area);
2273 }
2274
2275 mutex_unlock(&as->lock);
2276
2277 *osize = isize;
2278 return info;
2279}
2280
2281/** Print out information about address space.
2282 *
2283 * @param as Address space.
2284 *
2285 */
2286void as_print(as_t *as)
2287{
2288 mutex_lock(&as->lock);
2289
2290 /* Print out info about address space areas */
2291 as_area_t *area = as_area_first(as);
2292 while (area != NULL) {
2293 mutex_lock(&area->lock);
2294 printf("as_area: %p, base=%p, pages=%zu"
2295 " (%p - %p)\n", area, (void *) area->base,
2296 area->pages, (void *) area->base,
2297 (void *) (area->base + P2SZ(area->pages)));
2298 mutex_unlock(&area->lock);
2299
2300 area = as_area_next(area);
2301 }
2302
2303 mutex_unlock(&as->lock);
2304}
2305
2306/** @}
2307 */
Note: See TracBrowser for help on using the repository browser.