source: mainline/kernel/generic/src/mm/as.c@ 8620b2f

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8620b2f was 5df1963, checked in by Martin Decky <martin@…>, 12 years ago

bitmap frame allocator does not keep track of the size of the allocated frame blocks
to avoid memory leaks the number of allocated frames needs to be passed explicitly during deallocation

  • Property mode set to 100644
File size: 55.5 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericmm
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Address space related functions.
36 *
37 * This file contains address space manipulation functions.
38 * Roughly speaking, this is a higher-level client of
39 * Virtual Address Translation (VAT) subsystem.
40 *
41 * Functionality provided by this file allows one to
42 * create address spaces and create, resize and share
43 * address space areas.
44 *
45 * @see page.c
46 *
47 */
48
49#include <mm/as.h>
50#include <arch/mm/as.h>
51#include <mm/page.h>
52#include <mm/frame.h>
53#include <mm/slab.h>
54#include <mm/tlb.h>
55#include <arch/mm/page.h>
56#include <genarch/mm/page_pt.h>
57#include <genarch/mm/page_ht.h>
58#include <mm/asid.h>
59#include <arch/mm/asid.h>
60#include <preemption.h>
61#include <synch/spinlock.h>
62#include <synch/mutex.h>
63#include <adt/list.h>
64#include <adt/btree.h>
65#include <proc/task.h>
66#include <proc/thread.h>
67#include <arch/asm.h>
68#include <panic.h>
69#include <debug.h>
70#include <print.h>
71#include <memstr.h>
72#include <macros.h>
73#include <bitops.h>
74#include <arch.h>
75#include <errno.h>
76#include <config.h>
77#include <align.h>
78#include <typedefs.h>
79#include <syscall/copy.h>
80#include <arch/interrupt.h>
81#include <interrupt.h>
82
83/**
84 * Each architecture decides what functions will be used to carry out
85 * address space operations such as creating or locking page tables.
86 */
87as_operations_t *as_operations = NULL;
88
89/** Slab for as_t objects.
90 *
91 */
92static slab_cache_t *as_slab;
93
94/** ASID subsystem lock.
95 *
96 * This lock protects:
97 * - inactive_as_with_asid_list
98 * - as->asid for each as of the as_t type
99 * - asids_allocated counter
100 *
101 */
102SPINLOCK_INITIALIZE(asidlock);
103
104/**
105 * Inactive address spaces (on all processors)
106 * that have valid ASID.
107 */
108LIST_INITIALIZE(inactive_as_with_asid_list);
109
110/** Kernel address space. */
111as_t *AS_KERNEL = NULL;
112
113NO_TRACE static int as_constructor(void *obj, unsigned int flags)
114{
115 as_t *as = (as_t *) obj;
116
117 link_initialize(&as->inactive_as_with_asid_link);
118 mutex_initialize(&as->lock, MUTEX_PASSIVE);
119
120 return as_constructor_arch(as, flags);
121}
122
123NO_TRACE static size_t as_destructor(void *obj)
124{
125 return as_destructor_arch((as_t *) obj);
126}
127
128/** Initialize address space subsystem. */
129void as_init(void)
130{
131 as_arch_init();
132
133 as_slab = slab_cache_create("as_t", sizeof(as_t), 0,
134 as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
135
136 AS_KERNEL = as_create(FLAG_AS_KERNEL);
137 if (!AS_KERNEL)
138 panic("Cannot create kernel address space.");
139
140 /*
141 * Make sure the kernel address space
142 * reference count never drops to zero.
143 */
144 as_hold(AS_KERNEL);
145}
146
147/** Create address space.
148 *
149 * @param flags Flags that influence the way in wich the address
150 * space is created.
151 *
152 */
153as_t *as_create(unsigned int flags)
154{
155 as_t *as = (as_t *) slab_alloc(as_slab, 0);
156 (void) as_create_arch(as, 0);
157
158 btree_create(&as->as_area_btree);
159
160 if (flags & FLAG_AS_KERNEL)
161 as->asid = ASID_KERNEL;
162 else
163 as->asid = ASID_INVALID;
164
165 atomic_set(&as->refcount, 0);
166 as->cpu_refcount = 0;
167
168#ifdef AS_PAGE_TABLE
169 as->genarch.page_table = page_table_create(flags);
170#else
171 page_table_create(flags);
172#endif
173
174 return as;
175}
176
177/** Destroy adress space.
178 *
179 * When there are no tasks referencing this address space (i.e. its refcount is
180 * zero), the address space can be destroyed.
181 *
182 * We know that we don't hold any spinlock.
183 *
184 * @param as Address space to be destroyed.
185 *
186 */
187void as_destroy(as_t *as)
188{
189 DEADLOCK_PROBE_INIT(p_asidlock);
190
191 ASSERT(as != AS);
192 ASSERT(atomic_get(&as->refcount) == 0);
193
194 /*
195 * Since there is no reference to this address space, it is safe not to
196 * lock its mutex.
197 */
198
199 /*
200 * We need to avoid deadlock between TLB shootdown and asidlock.
201 * We therefore try to take asid conditionally and if we don't succeed,
202 * we enable interrupts and try again. This is done while preemption is
203 * disabled to prevent nested context switches. We also depend on the
204 * fact that so far no spinlocks are held.
205 */
206 preemption_disable();
207 ipl_t ipl = interrupts_read();
208
209retry:
210 interrupts_disable();
211 if (!spinlock_trylock(&asidlock)) {
212 interrupts_enable();
213 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
214 goto retry;
215 }
216
217 /* Interrupts disabled, enable preemption */
218 preemption_enable();
219
220 if ((as->asid != ASID_INVALID) && (as != AS_KERNEL)) {
221 if (as->cpu_refcount == 0)
222 list_remove(&as->inactive_as_with_asid_link);
223
224 asid_put(as->asid);
225 }
226
227 spinlock_unlock(&asidlock);
228 interrupts_restore(ipl);
229
230
231 /*
232 * Destroy address space areas of the address space.
233 * The B+tree must be walked carefully because it is
234 * also being destroyed.
235 */
236 bool cond = true;
237 while (cond) {
238 ASSERT(!list_empty(&as->as_area_btree.leaf_list));
239
240 btree_node_t *node =
241 list_get_instance(list_first(&as->as_area_btree.leaf_list),
242 btree_node_t, leaf_link);
243
244 if ((cond = node->keys))
245 as_area_destroy(as, node->key[0]);
246 }
247
248 btree_destroy(&as->as_area_btree);
249
250#ifdef AS_PAGE_TABLE
251 page_table_destroy(as->genarch.page_table);
252#else
253 page_table_destroy(NULL);
254#endif
255
256 slab_free(as_slab, as);
257}
258
259/** Hold a reference to an address space.
260 *
261 * Holding a reference to an address space prevents destruction
262 * of that address space.
263 *
264 * @param as Address space to be held.
265 *
266 */
267NO_TRACE void as_hold(as_t *as)
268{
269 atomic_inc(&as->refcount);
270}
271
272/** Release a reference to an address space.
273 *
274 * The last one to release a reference to an address space
275 * destroys the address space.
276 *
277 * @param asAddress space to be released.
278 *
279 */
280NO_TRACE void as_release(as_t *as)
281{
282 if (atomic_predec(&as->refcount) == 0)
283 as_destroy(as);
284}
285
286/** Check area conflicts with other areas.
287 *
288 * @param as Address space.
289 * @param addr Starting virtual address of the area being tested.
290 * @param count Number of pages in the area being tested.
291 * @param guarded True if the area being tested is protected by guard pages.
292 * @param avoid Do not touch this area.
293 *
294 * @return True if there is no conflict, false otherwise.
295 *
296 */
297NO_TRACE static bool check_area_conflicts(as_t *as, uintptr_t addr,
298 size_t count, bool guarded, as_area_t *avoid)
299{
300 ASSERT((addr % PAGE_SIZE) == 0);
301 ASSERT(mutex_locked(&as->lock));
302
303 /*
304 * If the addition of the supposed area address and size overflows,
305 * report conflict.
306 */
307 if (overflows_into_positive(addr, P2SZ(count)))
308 return false;
309
310 /*
311 * We don't want any area to have conflicts with NULL page.
312 */
313 if (overlaps(addr, P2SZ(count), (uintptr_t) NULL, PAGE_SIZE))
314 return false;
315
316 /*
317 * The leaf node is found in O(log n), where n is proportional to
318 * the number of address space areas belonging to as.
319 * The check for conflicts is then attempted on the rightmost
320 * record in the left neighbour, the leftmost record in the right
321 * neighbour and all records in the leaf node itself.
322 */
323 btree_node_t *leaf;
324 as_area_t *area =
325 (as_area_t *) btree_search(&as->as_area_btree, addr, &leaf);
326 if (area) {
327 if (area != avoid)
328 return false;
329 }
330
331 /* First, check the two border cases. */
332 btree_node_t *node =
333 btree_leaf_node_left_neighbour(&as->as_area_btree, leaf);
334 if (node) {
335 area = (as_area_t *) node->value[node->keys - 1];
336
337 if (area != avoid) {
338 mutex_lock(&area->lock);
339
340 /*
341 * If at least one of the two areas are protected
342 * by the AS_AREA_GUARD flag then we must be sure
343 * that they are separated by at least one unmapped
344 * page.
345 */
346 int const gp = (guarded ||
347 (area->flags & AS_AREA_GUARD)) ? 1 : 0;
348
349 /*
350 * The area comes from the left neighbour node, which
351 * means that there already are some areas in the leaf
352 * node, which in turn means that adding gp is safe and
353 * will not cause an integer overflow.
354 */
355 if (overlaps(addr, P2SZ(count), area->base,
356 P2SZ(area->pages + gp))) {
357 mutex_unlock(&area->lock);
358 return false;
359 }
360
361 mutex_unlock(&area->lock);
362 }
363 }
364
365 node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf);
366 if (node) {
367 area = (as_area_t *) node->value[0];
368
369 if (area != avoid) {
370 int gp;
371
372 mutex_lock(&area->lock);
373
374 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
375 if (gp && overflows(addr, P2SZ(count))) {
376 /*
377 * Guard page not needed if the supposed area
378 * is adjacent to the end of the address space.
379 * We already know that the following test is
380 * going to fail...
381 */
382 gp--;
383 }
384
385 if (overlaps(addr, P2SZ(count + gp), area->base,
386 P2SZ(area->pages))) {
387 mutex_unlock(&area->lock);
388 return false;
389 }
390
391 mutex_unlock(&area->lock);
392 }
393 }
394
395 /* Second, check the leaf node. */
396 btree_key_t i;
397 for (i = 0; i < leaf->keys; i++) {
398 area = (as_area_t *) leaf->value[i];
399 int agp;
400 int gp;
401
402 if (area == avoid)
403 continue;
404
405 mutex_lock(&area->lock);
406
407 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
408 agp = gp;
409
410 /*
411 * Sanitize the two possible unsigned integer overflows.
412 */
413 if (gp && overflows(addr, P2SZ(count)))
414 gp--;
415 if (agp && overflows(area->base, P2SZ(area->pages)))
416 agp--;
417
418 if (overlaps(addr, P2SZ(count + gp), area->base,
419 P2SZ(area->pages + agp))) {
420 mutex_unlock(&area->lock);
421 return false;
422 }
423
424 mutex_unlock(&area->lock);
425 }
426
427 /*
428 * So far, the area does not conflict with other areas.
429 * Check if it is contained in the user address space.
430 */
431 if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
432 return iswithin(USER_ADDRESS_SPACE_START,
433 (USER_ADDRESS_SPACE_END - USER_ADDRESS_SPACE_START) + 1,
434 addr, P2SZ(count));
435 }
436
437 return true;
438}
439
440/** Return pointer to unmapped address space area
441 *
442 * The address space must be already locked when calling
443 * this function.
444 *
445 * @param as Address space.
446 * @param bound Lowest address bound.
447 * @param size Requested size of the allocation.
448 * @param guarded True if the allocation must be protected by guard pages.
449 *
450 * @return Address of the beginning of unmapped address space area.
451 * @return -1 if no suitable address space area was found.
452 *
453 */
454NO_TRACE static uintptr_t as_get_unmapped_area(as_t *as, uintptr_t bound,
455 size_t size, bool guarded)
456{
457 ASSERT(mutex_locked(&as->lock));
458
459 if (size == 0)
460 return (uintptr_t) -1;
461
462 /*
463 * Make sure we allocate from page-aligned
464 * address. Check for possible overflow in
465 * each step.
466 */
467
468 size_t pages = SIZE2FRAMES(size);
469
470 /*
471 * Find the lowest unmapped address aligned on the size
472 * boundary, not smaller than bound and of the required size.
473 */
474
475 /* First check the bound address itself */
476 uintptr_t addr = ALIGN_UP(bound, PAGE_SIZE);
477 if (addr >= bound) {
478 if (guarded) {
479 /* Leave an unmapped page between the lower
480 * bound and the area's start address.
481 */
482 addr += P2SZ(1);
483 }
484
485 if (check_area_conflicts(as, addr, pages, guarded, NULL))
486 return addr;
487 }
488
489 /* Eventually check the addresses behind each area */
490 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t, node) {
491
492 for (btree_key_t i = 0; i < node->keys; i++) {
493 as_area_t *area = (as_area_t *) node->value[i];
494
495 mutex_lock(&area->lock);
496
497 addr =
498 ALIGN_UP(area->base + P2SZ(area->pages), PAGE_SIZE);
499
500 if (guarded || area->flags & AS_AREA_GUARD) {
501 /* We must leave an unmapped page
502 * between the two areas.
503 */
504 addr += P2SZ(1);
505 }
506
507 bool avail =
508 ((addr >= bound) && (addr >= area->base) &&
509 (check_area_conflicts(as, addr, pages, guarded, area)));
510
511 mutex_unlock(&area->lock);
512
513 if (avail)
514 return addr;
515 }
516 }
517
518 /* No suitable address space area found */
519 return (uintptr_t) -1;
520}
521
522/** Create address space area of common attributes.
523 *
524 * The created address space area is added to the target address space.
525 *
526 * @param as Target address space.
527 * @param flags Flags of the area memory.
528 * @param size Size of area.
529 * @param attrs Attributes of the area.
530 * @param backend Address space area backend. NULL if no backend is used.
531 * @param backend_data NULL or a pointer to an array holding two void *.
532 * @param base Starting virtual address of the area.
533 * If set to -1, a suitable mappable area is found.
534 * @param bound Lowest address bound if base is set to -1.
535 * Otherwise ignored.
536 *
537 * @return Address space area on success or NULL on failure.
538 *
539 */
540as_area_t *as_area_create(as_t *as, unsigned int flags, size_t size,
541 unsigned int attrs, mem_backend_t *backend,
542 mem_backend_data_t *backend_data, uintptr_t *base, uintptr_t bound)
543{
544 if ((*base != (uintptr_t) -1) && !IS_ALIGNED(*base, PAGE_SIZE))
545 return NULL;
546
547 if (size == 0)
548 return NULL;
549
550 size_t pages = SIZE2FRAMES(size);
551
552 /* Writeable executable areas are not supported. */
553 if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
554 return NULL;
555
556 bool const guarded = flags & AS_AREA_GUARD;
557
558 mutex_lock(&as->lock);
559
560 if (*base == (uintptr_t) -1) {
561 *base = as_get_unmapped_area(as, bound, size, guarded);
562 if (*base == (uintptr_t) -1) {
563 mutex_unlock(&as->lock);
564 return NULL;
565 }
566 }
567
568 if (overflows_into_positive(*base, size))
569 return NULL;
570
571 if (!check_area_conflicts(as, *base, pages, guarded, NULL)) {
572 mutex_unlock(&as->lock);
573 return NULL;
574 }
575
576 as_area_t *area = (as_area_t *) malloc(sizeof(as_area_t), 0);
577
578 mutex_initialize(&area->lock, MUTEX_PASSIVE);
579
580 area->as = as;
581 area->flags = flags;
582 area->attributes = attrs;
583 area->pages = pages;
584 area->resident = 0;
585 area->base = *base;
586 area->sh_info = NULL;
587 area->backend = backend;
588
589 if (backend_data)
590 area->backend_data = *backend_data;
591 else
592 memsetb(&area->backend_data, sizeof(area->backend_data), 0);
593
594 if (area->backend && area->backend->create) {
595 if (!area->backend->create(area)) {
596 free(area);
597 mutex_unlock(&as->lock);
598 return NULL;
599 }
600 }
601
602 btree_create(&area->used_space);
603 btree_insert(&as->as_area_btree, *base, (void *) area,
604 NULL);
605
606 mutex_unlock(&as->lock);
607
608 return area;
609}
610
611/** Find address space area and lock it.
612 *
613 * @param as Address space.
614 * @param va Virtual address.
615 *
616 * @return Locked address space area containing va on success or
617 * NULL on failure.
618 *
619 */
620NO_TRACE static as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
621{
622 ASSERT(mutex_locked(&as->lock));
623
624 btree_node_t *leaf;
625 as_area_t *area = (as_area_t *) btree_search(&as->as_area_btree, va,
626 &leaf);
627 if (area) {
628 /* va is the base address of an address space area */
629 mutex_lock(&area->lock);
630 return area;
631 }
632
633 /*
634 * Search the leaf node and the rightmost record of its left neighbour
635 * to find out whether this is a miss or va belongs to an address
636 * space area found there.
637 */
638
639 /* First, search the leaf node itself. */
640 btree_key_t i;
641
642 for (i = 0; i < leaf->keys; i++) {
643 area = (as_area_t *) leaf->value[i];
644
645 mutex_lock(&area->lock);
646
647 if ((area->base <= va) &&
648 (va <= area->base + (P2SZ(area->pages) - 1)))
649 return area;
650
651 mutex_unlock(&area->lock);
652 }
653
654 /*
655 * Second, locate the left neighbour and test its last record.
656 * Because of its position in the B+tree, it must have base < va.
657 */
658 btree_node_t *lnode = btree_leaf_node_left_neighbour(&as->as_area_btree,
659 leaf);
660 if (lnode) {
661 area = (as_area_t *) lnode->value[lnode->keys - 1];
662
663 mutex_lock(&area->lock);
664
665 if (va <= area->base + (P2SZ(area->pages) - 1))
666 return area;
667
668 mutex_unlock(&area->lock);
669 }
670
671 return NULL;
672}
673
674/** Find address space area and change it.
675 *
676 * @param as Address space.
677 * @param address Virtual address belonging to the area to be changed.
678 * Must be page-aligned.
679 * @param size New size of the virtual memory block starting at
680 * address.
681 * @param flags Flags influencing the remap operation. Currently unused.
682 *
683 * @return Zero on success or a value from @ref errno.h otherwise.
684 *
685 */
686int as_area_resize(as_t *as, uintptr_t address, size_t size, unsigned int flags)
687{
688 if (!IS_ALIGNED(address, PAGE_SIZE))
689 return EINVAL;
690
691 mutex_lock(&as->lock);
692
693 /*
694 * Locate the area.
695 */
696 as_area_t *area = find_area_and_lock(as, address);
697 if (!area) {
698 mutex_unlock(&as->lock);
699 return ENOENT;
700 }
701
702 if (!area->backend->is_resizable(area)) {
703 /*
704 * The backend does not support resizing for this area.
705 */
706 mutex_unlock(&area->lock);
707 mutex_unlock(&as->lock);
708 return ENOTSUP;
709 }
710
711 if (area->sh_info) {
712 /*
713 * Remapping of shared address space areas
714 * is not supported.
715 */
716 mutex_unlock(&area->lock);
717 mutex_unlock(&as->lock);
718 return ENOTSUP;
719 }
720
721 size_t pages = SIZE2FRAMES((address - area->base) + size);
722 if (!pages) {
723 /*
724 * Zero size address space areas are not allowed.
725 */
726 mutex_unlock(&area->lock);
727 mutex_unlock(&as->lock);
728 return EPERM;
729 }
730
731 if (pages < area->pages) {
732 uintptr_t start_free = area->base + P2SZ(pages);
733
734 /*
735 * Shrinking the area.
736 * No need to check for overlaps.
737 */
738
739 page_table_lock(as, false);
740
741 /*
742 * Remove frames belonging to used space starting from
743 * the highest addresses downwards until an overlap with
744 * the resized address space area is found. Note that this
745 * is also the right way to remove part of the used_space
746 * B+tree leaf list.
747 */
748 bool cond = true;
749 while (cond) {
750 ASSERT(!list_empty(&area->used_space.leaf_list));
751
752 btree_node_t *node =
753 list_get_instance(list_last(&area->used_space.leaf_list),
754 btree_node_t, leaf_link);
755
756 if ((cond = (bool) node->keys)) {
757 uintptr_t ptr = node->key[node->keys - 1];
758 size_t size =
759 (size_t) node->value[node->keys - 1];
760 size_t i = 0;
761
762 if (overlaps(ptr, P2SZ(size), area->base,
763 P2SZ(pages))) {
764
765 if (ptr + P2SZ(size) <= start_free) {
766 /*
767 * The whole interval fits
768 * completely in the resized
769 * address space area.
770 */
771 break;
772 }
773
774 /*
775 * Part of the interval corresponding
776 * to b and c overlaps with the resized
777 * address space area.
778 */
779
780 /* We are almost done */
781 cond = false;
782 i = (start_free - ptr) >> PAGE_WIDTH;
783 if (!used_space_remove(area, start_free,
784 size - i))
785 panic("Cannot remove used space.");
786 } else {
787 /*
788 * The interval of used space can be
789 * completely removed.
790 */
791 if (!used_space_remove(area, ptr, size))
792 panic("Cannot remove used space.");
793 }
794
795 /*
796 * Start TLB shootdown sequence.
797 *
798 * The sequence is rather short and can be
799 * repeated multiple times. The reason is that
800 * we don't want to have used_space_remove()
801 * inside the sequence as it may use a blocking
802 * memory allocation for its B+tree. Blocking
803 * while holding the tlblock spinlock is
804 * forbidden and would hit a kernel assertion.
805 */
806
807 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES,
808 as->asid, area->base + P2SZ(pages),
809 area->pages - pages);
810
811 for (; i < size; i++) {
812 pte_t *pte = page_mapping_find(as,
813 ptr + P2SZ(i), false);
814
815 ASSERT(pte);
816 ASSERT(PTE_VALID(pte));
817 ASSERT(PTE_PRESENT(pte));
818
819 if ((area->backend) &&
820 (area->backend->frame_free)) {
821 area->backend->frame_free(area,
822 ptr + P2SZ(i),
823 PTE_GET_FRAME(pte));
824 }
825
826 page_mapping_remove(as, ptr + P2SZ(i));
827 }
828
829 /*
830 * Finish TLB shootdown sequence.
831 */
832
833 tlb_invalidate_pages(as->asid,
834 area->base + P2SZ(pages),
835 area->pages - pages);
836
837 /*
838 * Invalidate software translation caches
839 * (e.g. TSB on sparc64, PHT on ppc32).
840 */
841 as_invalidate_translation_cache(as,
842 area->base + P2SZ(pages),
843 area->pages - pages);
844 tlb_shootdown_finalize(ipl);
845 }
846 }
847 page_table_unlock(as, false);
848 } else {
849 /*
850 * Growing the area.
851 */
852
853 if (overflows_into_positive(address, P2SZ(pages)))
854 return EINVAL;
855
856 /*
857 * Check for overlaps with other address space areas.
858 */
859 bool const guarded = area->flags & AS_AREA_GUARD;
860 if (!check_area_conflicts(as, address, pages, guarded, area)) {
861 mutex_unlock(&area->lock);
862 mutex_unlock(&as->lock);
863 return EADDRNOTAVAIL;
864 }
865 }
866
867 if (area->backend && area->backend->resize) {
868 if (!area->backend->resize(area, pages)) {
869 mutex_unlock(&area->lock);
870 mutex_unlock(&as->lock);
871 return ENOMEM;
872 }
873 }
874
875 area->pages = pages;
876
877 mutex_unlock(&area->lock);
878 mutex_unlock(&as->lock);
879
880 return 0;
881}
882
883/** Remove reference to address space area share info.
884 *
885 * If the reference count drops to 0, the sh_info is deallocated.
886 *
887 * @param sh_info Pointer to address space area share info.
888 *
889 */
890NO_TRACE static void sh_info_remove_reference(share_info_t *sh_info)
891{
892 bool dealloc = false;
893
894 mutex_lock(&sh_info->lock);
895 ASSERT(sh_info->refcount);
896
897 if (--sh_info->refcount == 0) {
898 dealloc = true;
899
900 /*
901 * Now walk carefully the pagemap B+tree and free/remove
902 * reference from all frames found there.
903 */
904 list_foreach(sh_info->pagemap.leaf_list, leaf_link,
905 btree_node_t, node) {
906 btree_key_t i;
907
908 for (i = 0; i < node->keys; i++)
909 frame_free((uintptr_t) node->value[i], 1);
910 }
911
912 }
913 mutex_unlock(&sh_info->lock);
914
915 if (dealloc) {
916 btree_destroy(&sh_info->pagemap);
917 free(sh_info);
918 }
919}
920
921/** Destroy address space area.
922 *
923 * @param as Address space.
924 * @param address Address within the area to be deleted.
925 *
926 * @return Zero on success or a value from @ref errno.h on failure.
927 *
928 */
929int as_area_destroy(as_t *as, uintptr_t address)
930{
931 mutex_lock(&as->lock);
932
933 as_area_t *area = find_area_and_lock(as, address);
934 if (!area) {
935 mutex_unlock(&as->lock);
936 return ENOENT;
937 }
938
939 if (area->backend && area->backend->destroy)
940 area->backend->destroy(area);
941
942 uintptr_t base = area->base;
943
944 page_table_lock(as, false);
945
946 /*
947 * Start TLB shootdown sequence.
948 */
949 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
950 area->pages);
951
952 /*
953 * Visit only the pages mapped by used_space B+tree.
954 */
955 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
956 node) {
957 btree_key_t i;
958
959 for (i = 0; i < node->keys; i++) {
960 uintptr_t ptr = node->key[i];
961 size_t size;
962
963 for (size = 0; size < (size_t) node->value[i]; size++) {
964 pte_t *pte = page_mapping_find(as,
965 ptr + P2SZ(size), false);
966
967 ASSERT(pte);
968 ASSERT(PTE_VALID(pte));
969 ASSERT(PTE_PRESENT(pte));
970
971 if ((area->backend) &&
972 (area->backend->frame_free)) {
973 area->backend->frame_free(area,
974 ptr + P2SZ(size),
975 PTE_GET_FRAME(pte));
976 }
977
978 page_mapping_remove(as, ptr + P2SZ(size));
979 }
980 }
981 }
982
983 /*
984 * Finish TLB shootdown sequence.
985 */
986
987 tlb_invalidate_pages(as->asid, area->base, area->pages);
988
989 /*
990 * Invalidate potential software translation caches
991 * (e.g. TSB on sparc64, PHT on ppc32).
992 */
993 as_invalidate_translation_cache(as, area->base, area->pages);
994 tlb_shootdown_finalize(ipl);
995
996 page_table_unlock(as, false);
997
998 btree_destroy(&area->used_space);
999
1000 area->attributes |= AS_AREA_ATTR_PARTIAL;
1001
1002 if (area->sh_info)
1003 sh_info_remove_reference(area->sh_info);
1004
1005 mutex_unlock(&area->lock);
1006
1007 /*
1008 * Remove the empty area from address space.
1009 */
1010 btree_remove(&as->as_area_btree, base, NULL);
1011
1012 free(area);
1013
1014 mutex_unlock(&as->lock);
1015 return 0;
1016}
1017
1018/** Share address space area with another or the same address space.
1019 *
1020 * Address space area mapping is shared with a new address space area.
1021 * If the source address space area has not been shared so far,
1022 * a new sh_info is created. The new address space area simply gets the
1023 * sh_info of the source area. The process of duplicating the
1024 * mapping is done through the backend share function.
1025 *
1026 * @param src_as Pointer to source address space.
1027 * @param src_base Base address of the source address space area.
1028 * @param acc_size Expected size of the source area.
1029 * @param dst_as Pointer to destination address space.
1030 * @param dst_flags_mask Destination address space area flags mask.
1031 * @param dst_base Target base address. If set to -1,
1032 * a suitable mappable area is found.
1033 * @param bound Lowest address bound if dst_base is set to -1.
1034 * Otherwise ignored.
1035 *
1036 * @return Zero on success.
1037 * @return ENOENT if there is no such task or such address space.
1038 * @return EPERM if there was a problem in accepting the area.
1039 * @return ENOMEM if there was a problem in allocating destination
1040 * address space area.
1041 * @return ENOTSUP if the address space area backend does not support
1042 * sharing.
1043 *
1044 */
1045int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
1046 as_t *dst_as, unsigned int dst_flags_mask, uintptr_t *dst_base,
1047 uintptr_t bound)
1048{
1049 mutex_lock(&src_as->lock);
1050 as_area_t *src_area = find_area_and_lock(src_as, src_base);
1051 if (!src_area) {
1052 /*
1053 * Could not find the source address space area.
1054 */
1055 mutex_unlock(&src_as->lock);
1056 return ENOENT;
1057 }
1058
1059 if (!src_area->backend->is_shareable(src_area)) {
1060 /*
1061 * The backend does not permit sharing of this area.
1062 */
1063 mutex_unlock(&src_area->lock);
1064 mutex_unlock(&src_as->lock);
1065 return ENOTSUP;
1066 }
1067
1068 size_t src_size = P2SZ(src_area->pages);
1069 unsigned int src_flags = src_area->flags;
1070 mem_backend_t *src_backend = src_area->backend;
1071 mem_backend_data_t src_backend_data = src_area->backend_data;
1072
1073 /* Share the cacheable flag from the original mapping */
1074 if (src_flags & AS_AREA_CACHEABLE)
1075 dst_flags_mask |= AS_AREA_CACHEABLE;
1076
1077 if ((src_size != acc_size) ||
1078 ((src_flags & dst_flags_mask) != dst_flags_mask)) {
1079 mutex_unlock(&src_area->lock);
1080 mutex_unlock(&src_as->lock);
1081 return EPERM;
1082 }
1083
1084 /*
1085 * Now we are committed to sharing the area.
1086 * First, prepare the area for sharing.
1087 * Then it will be safe to unlock it.
1088 */
1089 share_info_t *sh_info = src_area->sh_info;
1090 if (!sh_info) {
1091 sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0);
1092 mutex_initialize(&sh_info->lock, MUTEX_PASSIVE);
1093 sh_info->refcount = 2;
1094 btree_create(&sh_info->pagemap);
1095 src_area->sh_info = sh_info;
1096
1097 /*
1098 * Call the backend to setup sharing.
1099 */
1100 src_area->backend->share(src_area);
1101 } else {
1102 mutex_lock(&sh_info->lock);
1103 sh_info->refcount++;
1104 mutex_unlock(&sh_info->lock);
1105 }
1106
1107 mutex_unlock(&src_area->lock);
1108 mutex_unlock(&src_as->lock);
1109
1110 /*
1111 * Create copy of the source address space area.
1112 * The destination area is created with AS_AREA_ATTR_PARTIAL
1113 * attribute set which prevents race condition with
1114 * preliminary as_page_fault() calls.
1115 * The flags of the source area are masked against dst_flags_mask
1116 * to support sharing in less privileged mode.
1117 */
1118 as_area_t *dst_area = as_area_create(dst_as, dst_flags_mask,
1119 src_size, AS_AREA_ATTR_PARTIAL, src_backend,
1120 &src_backend_data, dst_base, bound);
1121 if (!dst_area) {
1122 /*
1123 * Destination address space area could not be created.
1124 */
1125 sh_info_remove_reference(sh_info);
1126
1127 return ENOMEM;
1128 }
1129
1130 /*
1131 * Now the destination address space area has been
1132 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
1133 * attribute and set the sh_info.
1134 */
1135 mutex_lock(&dst_as->lock);
1136 mutex_lock(&dst_area->lock);
1137 dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
1138 dst_area->sh_info = sh_info;
1139 mutex_unlock(&dst_area->lock);
1140 mutex_unlock(&dst_as->lock);
1141
1142 return 0;
1143}
1144
1145/** Check access mode for address space area.
1146 *
1147 * @param area Address space area.
1148 * @param access Access mode.
1149 *
1150 * @return False if access violates area's permissions, true
1151 * otherwise.
1152 *
1153 */
1154NO_TRACE bool as_area_check_access(as_area_t *area, pf_access_t access)
1155{
1156 ASSERT(mutex_locked(&area->lock));
1157
1158 int flagmap[] = {
1159 [PF_ACCESS_READ] = AS_AREA_READ,
1160 [PF_ACCESS_WRITE] = AS_AREA_WRITE,
1161 [PF_ACCESS_EXEC] = AS_AREA_EXEC
1162 };
1163
1164 if (!(area->flags & flagmap[access]))
1165 return false;
1166
1167 return true;
1168}
1169
1170/** Convert address space area flags to page flags.
1171 *
1172 * @param aflags Flags of some address space area.
1173 *
1174 * @return Flags to be passed to page_mapping_insert().
1175 *
1176 */
1177NO_TRACE static unsigned int area_flags_to_page_flags(unsigned int aflags)
1178{
1179 unsigned int flags = PAGE_USER | PAGE_PRESENT;
1180
1181 if (aflags & AS_AREA_READ)
1182 flags |= PAGE_READ;
1183
1184 if (aflags & AS_AREA_WRITE)
1185 flags |= PAGE_WRITE;
1186
1187 if (aflags & AS_AREA_EXEC)
1188 flags |= PAGE_EXEC;
1189
1190 if (aflags & AS_AREA_CACHEABLE)
1191 flags |= PAGE_CACHEABLE;
1192
1193 return flags;
1194}
1195
1196/** Change adress space area flags.
1197 *
1198 * The idea is to have the same data, but with a different access mode.
1199 * This is needed e.g. for writing code into memory and then executing it.
1200 * In order for this to work properly, this may copy the data
1201 * into private anonymous memory (unless it's already there).
1202 *
1203 * @param as Address space.
1204 * @param flags Flags of the area memory.
1205 * @param address Address within the area to be changed.
1206 *
1207 * @return Zero on success or a value from @ref errno.h on failure.
1208 *
1209 */
1210int as_area_change_flags(as_t *as, unsigned int flags, uintptr_t address)
1211{
1212 /* Flags for the new memory mapping */
1213 unsigned int page_flags = area_flags_to_page_flags(flags);
1214
1215 mutex_lock(&as->lock);
1216
1217 as_area_t *area = find_area_and_lock(as, address);
1218 if (!area) {
1219 mutex_unlock(&as->lock);
1220 return ENOENT;
1221 }
1222
1223 if ((area->sh_info) || (area->backend != &anon_backend)) {
1224 /* Copying shared areas not supported yet */
1225 /* Copying non-anonymous memory not supported yet */
1226 mutex_unlock(&area->lock);
1227 mutex_unlock(&as->lock);
1228 return ENOTSUP;
1229 }
1230
1231 /*
1232 * Compute total number of used pages in the used_space B+tree
1233 */
1234 size_t used_pages = 0;
1235
1236 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1237 node) {
1238 btree_key_t i;
1239
1240 for (i = 0; i < node->keys; i++)
1241 used_pages += (size_t) node->value[i];
1242 }
1243
1244 /* An array for storing frame numbers */
1245 uintptr_t *old_frame = malloc(used_pages * sizeof(uintptr_t), 0);
1246
1247 page_table_lock(as, false);
1248
1249 /*
1250 * Start TLB shootdown sequence.
1251 */
1252 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
1253 area->pages);
1254
1255 /*
1256 * Remove used pages from page tables and remember their frame
1257 * numbers.
1258 */
1259 size_t frame_idx = 0;
1260
1261 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1262 node) {
1263 btree_key_t i;
1264
1265 for (i = 0; i < node->keys; i++) {
1266 uintptr_t ptr = node->key[i];
1267 size_t size;
1268
1269 for (size = 0; size < (size_t) node->value[i]; size++) {
1270 pte_t *pte = page_mapping_find(as,
1271 ptr + P2SZ(size), false);
1272
1273 ASSERT(pte);
1274 ASSERT(PTE_VALID(pte));
1275 ASSERT(PTE_PRESENT(pte));
1276
1277 old_frame[frame_idx++] = PTE_GET_FRAME(pte);
1278
1279 /* Remove old mapping */
1280 page_mapping_remove(as, ptr + P2SZ(size));
1281 }
1282 }
1283 }
1284
1285 /*
1286 * Finish TLB shootdown sequence.
1287 */
1288
1289 tlb_invalidate_pages(as->asid, area->base, area->pages);
1290
1291 /*
1292 * Invalidate potential software translation caches
1293 * (e.g. TSB on sparc64, PHT on ppc32).
1294 */
1295 as_invalidate_translation_cache(as, area->base, area->pages);
1296 tlb_shootdown_finalize(ipl);
1297
1298 page_table_unlock(as, false);
1299
1300 /*
1301 * Set the new flags.
1302 */
1303 area->flags = flags;
1304
1305 /*
1306 * Map pages back in with new flags. This step is kept separate
1307 * so that the memory area could not be accesed with both the old and
1308 * the new flags at once.
1309 */
1310 frame_idx = 0;
1311
1312 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1313 node) {
1314 btree_key_t i;
1315
1316 for (i = 0; i < node->keys; i++) {
1317 uintptr_t ptr = node->key[i];
1318 size_t size;
1319
1320 for (size = 0; size < (size_t) node->value[i]; size++) {
1321 page_table_lock(as, false);
1322
1323 /* Insert the new mapping */
1324 page_mapping_insert(as, ptr + P2SZ(size),
1325 old_frame[frame_idx++], page_flags);
1326
1327 page_table_unlock(as, false);
1328 }
1329 }
1330 }
1331
1332 free(old_frame);
1333
1334 mutex_unlock(&area->lock);
1335 mutex_unlock(&as->lock);
1336
1337 return 0;
1338}
1339
1340/** Handle page fault within the current address space.
1341 *
1342 * This is the high-level page fault handler. It decides whether the page fault
1343 * can be resolved by any backend and if so, it invokes the backend to resolve
1344 * the page fault.
1345 *
1346 * Interrupts are assumed disabled.
1347 *
1348 * @param address Faulting address.
1349 * @param access Access mode that caused the page fault (i.e.
1350 * read/write/exec).
1351 * @param istate Pointer to the interrupted state.
1352 *
1353 * @return AS_PF_FAULT on page fault.
1354 * @return AS_PF_OK on success.
1355 * @return AS_PF_DEFER if the fault was caused by copy_to_uspace()
1356 * or copy_from_uspace().
1357 *
1358 */
1359int as_page_fault(uintptr_t address, pf_access_t access, istate_t *istate)
1360{
1361 uintptr_t page = ALIGN_DOWN(address, PAGE_SIZE);
1362 int rc = AS_PF_FAULT;
1363
1364 if (!THREAD)
1365 goto page_fault;
1366
1367 if (!AS)
1368 goto page_fault;
1369
1370 mutex_lock(&AS->lock);
1371 as_area_t *area = find_area_and_lock(AS, page);
1372 if (!area) {
1373 /*
1374 * No area contained mapping for 'page'.
1375 * Signal page fault to low-level handler.
1376 */
1377 mutex_unlock(&AS->lock);
1378 goto page_fault;
1379 }
1380
1381 if (area->attributes & AS_AREA_ATTR_PARTIAL) {
1382 /*
1383 * The address space area is not fully initialized.
1384 * Avoid possible race by returning error.
1385 */
1386 mutex_unlock(&area->lock);
1387 mutex_unlock(&AS->lock);
1388 goto page_fault;
1389 }
1390
1391 if ((!area->backend) || (!area->backend->page_fault)) {
1392 /*
1393 * The address space area is not backed by any backend
1394 * or the backend cannot handle page faults.
1395 */
1396 mutex_unlock(&area->lock);
1397 mutex_unlock(&AS->lock);
1398 goto page_fault;
1399 }
1400
1401 page_table_lock(AS, false);
1402
1403 /*
1404 * To avoid race condition between two page faults on the same address,
1405 * we need to make sure the mapping has not been already inserted.
1406 */
1407 pte_t *pte;
1408 if ((pte = page_mapping_find(AS, page, false))) {
1409 if (PTE_PRESENT(pte)) {
1410 if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) ||
1411 (access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) ||
1412 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) {
1413 page_table_unlock(AS, false);
1414 mutex_unlock(&area->lock);
1415 mutex_unlock(&AS->lock);
1416 return AS_PF_OK;
1417 }
1418 }
1419 }
1420
1421 /*
1422 * Resort to the backend page fault handler.
1423 */
1424 rc = area->backend->page_fault(area, page, access);
1425 if (rc != AS_PF_OK) {
1426 page_table_unlock(AS, false);
1427 mutex_unlock(&area->lock);
1428 mutex_unlock(&AS->lock);
1429 goto page_fault;
1430 }
1431
1432 page_table_unlock(AS, false);
1433 mutex_unlock(&area->lock);
1434 mutex_unlock(&AS->lock);
1435 return AS_PF_OK;
1436
1437page_fault:
1438 if (THREAD->in_copy_from_uspace) {
1439 THREAD->in_copy_from_uspace = false;
1440 istate_set_retaddr(istate,
1441 (uintptr_t) &memcpy_from_uspace_failover_address);
1442 } else if (THREAD->in_copy_to_uspace) {
1443 THREAD->in_copy_to_uspace = false;
1444 istate_set_retaddr(istate,
1445 (uintptr_t) &memcpy_to_uspace_failover_address);
1446 } else if (rc == AS_PF_SILENT) {
1447 printf("Killing task %" PRIu64 " due to a "
1448 "failed late reservation request.\n", TASK->taskid);
1449 task_kill_self(true);
1450 } else {
1451 fault_if_from_uspace(istate, "Page fault: %p.", (void *) address);
1452 panic_memtrap(istate, access, address, NULL);
1453 }
1454
1455 return AS_PF_DEFER;
1456}
1457
1458/** Switch address spaces.
1459 *
1460 * Note that this function cannot sleep as it is essentially a part of
1461 * scheduling. Sleeping here would lead to deadlock on wakeup. Another
1462 * thing which is forbidden in this context is locking the address space.
1463 *
1464 * When this function is entered, no spinlocks may be held.
1465 *
1466 * @param old Old address space or NULL.
1467 * @param new New address space.
1468 *
1469 */
1470void as_switch(as_t *old_as, as_t *new_as)
1471{
1472 DEADLOCK_PROBE_INIT(p_asidlock);
1473 preemption_disable();
1474
1475retry:
1476 (void) interrupts_disable();
1477 if (!spinlock_trylock(&asidlock)) {
1478 /*
1479 * Avoid deadlock with TLB shootdown.
1480 * We can enable interrupts here because
1481 * preemption is disabled. We should not be
1482 * holding any other lock.
1483 */
1484 (void) interrupts_enable();
1485 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
1486 goto retry;
1487 }
1488 preemption_enable();
1489
1490 /*
1491 * First, take care of the old address space.
1492 */
1493 if (old_as) {
1494 ASSERT(old_as->cpu_refcount);
1495
1496 if ((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
1497 /*
1498 * The old address space is no longer active on
1499 * any processor. It can be appended to the
1500 * list of inactive address spaces with assigned
1501 * ASID.
1502 */
1503 ASSERT(old_as->asid != ASID_INVALID);
1504
1505 list_append(&old_as->inactive_as_with_asid_link,
1506 &inactive_as_with_asid_list);
1507 }
1508
1509 /*
1510 * Perform architecture-specific tasks when the address space
1511 * is being removed from the CPU.
1512 */
1513 as_deinstall_arch(old_as);
1514 }
1515
1516 /*
1517 * Second, prepare the new address space.
1518 */
1519 if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
1520 if (new_as->asid != ASID_INVALID)
1521 list_remove(&new_as->inactive_as_with_asid_link);
1522 else
1523 new_as->asid = asid_get();
1524 }
1525
1526#ifdef AS_PAGE_TABLE
1527 SET_PTL0_ADDRESS(new_as->genarch.page_table);
1528#endif
1529
1530 /*
1531 * Perform architecture-specific steps.
1532 * (e.g. write ASID to hardware register etc.)
1533 */
1534 as_install_arch(new_as);
1535
1536 spinlock_unlock(&asidlock);
1537
1538 AS = new_as;
1539}
1540
1541/** Compute flags for virtual address translation subsytem.
1542 *
1543 * @param area Address space area.
1544 *
1545 * @return Flags to be used in page_mapping_insert().
1546 *
1547 */
1548NO_TRACE unsigned int as_area_get_flags(as_area_t *area)
1549{
1550 ASSERT(mutex_locked(&area->lock));
1551
1552 return area_flags_to_page_flags(area->flags);
1553}
1554
1555/** Create page table.
1556 *
1557 * Depending on architecture, create either address space private or global page
1558 * table.
1559 *
1560 * @param flags Flags saying whether the page table is for the kernel
1561 * address space.
1562 *
1563 * @return First entry of the page table.
1564 *
1565 */
1566NO_TRACE pte_t *page_table_create(unsigned int flags)
1567{
1568 ASSERT(as_operations);
1569 ASSERT(as_operations->page_table_create);
1570
1571 return as_operations->page_table_create(flags);
1572}
1573
1574/** Destroy page table.
1575 *
1576 * Destroy page table in architecture specific way.
1577 *
1578 * @param page_table Physical address of PTL0.
1579 *
1580 */
1581NO_TRACE void page_table_destroy(pte_t *page_table)
1582{
1583 ASSERT(as_operations);
1584 ASSERT(as_operations->page_table_destroy);
1585
1586 as_operations->page_table_destroy(page_table);
1587}
1588
1589/** Lock page table.
1590 *
1591 * This function should be called before any page_mapping_insert(),
1592 * page_mapping_remove() and page_mapping_find().
1593 *
1594 * Locking order is such that address space areas must be locked
1595 * prior to this call. Address space can be locked prior to this
1596 * call in which case the lock argument is false.
1597 *
1598 * @param as Address space.
1599 * @param lock If false, do not attempt to lock as->lock.
1600 *
1601 */
1602NO_TRACE void page_table_lock(as_t *as, bool lock)
1603{
1604 ASSERT(as_operations);
1605 ASSERT(as_operations->page_table_lock);
1606
1607 as_operations->page_table_lock(as, lock);
1608}
1609
1610/** Unlock page table.
1611 *
1612 * @param as Address space.
1613 * @param unlock If false, do not attempt to unlock as->lock.
1614 *
1615 */
1616NO_TRACE void page_table_unlock(as_t *as, bool unlock)
1617{
1618 ASSERT(as_operations);
1619 ASSERT(as_operations->page_table_unlock);
1620
1621 as_operations->page_table_unlock(as, unlock);
1622}
1623
1624/** Test whether page tables are locked.
1625 *
1626 * @param as Address space where the page tables belong.
1627 *
1628 * @return True if the page tables belonging to the address soace
1629 * are locked, otherwise false.
1630 */
1631NO_TRACE bool page_table_locked(as_t *as)
1632{
1633 ASSERT(as_operations);
1634 ASSERT(as_operations->page_table_locked);
1635
1636 return as_operations->page_table_locked(as);
1637}
1638
1639/** Return size of the address space area with given base.
1640 *
1641 * @param base Arbitrary address inside the address space area.
1642 *
1643 * @return Size of the address space area in bytes or zero if it
1644 * does not exist.
1645 *
1646 */
1647size_t as_area_get_size(uintptr_t base)
1648{
1649 size_t size;
1650
1651 page_table_lock(AS, true);
1652 as_area_t *src_area = find_area_and_lock(AS, base);
1653
1654 if (src_area) {
1655 size = P2SZ(src_area->pages);
1656 mutex_unlock(&src_area->lock);
1657 } else
1658 size = 0;
1659
1660 page_table_unlock(AS, true);
1661 return size;
1662}
1663
1664/** Mark portion of address space area as used.
1665 *
1666 * The address space area must be already locked.
1667 *
1668 * @param area Address space area.
1669 * @param page First page to be marked.
1670 * @param count Number of page to be marked.
1671 *
1672 * @return False on failure or true on success.
1673 *
1674 */
1675bool used_space_insert(as_area_t *area, uintptr_t page, size_t count)
1676{
1677 ASSERT(mutex_locked(&area->lock));
1678 ASSERT(IS_ALIGNED(page, PAGE_SIZE));
1679 ASSERT(count);
1680
1681 btree_node_t *leaf;
1682 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1683 if (pages) {
1684 /*
1685 * We hit the beginning of some used space.
1686 */
1687 return false;
1688 }
1689
1690 if (!leaf->keys) {
1691 btree_insert(&area->used_space, page, (void *) count, leaf);
1692 goto success;
1693 }
1694
1695 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
1696 if (node) {
1697 uintptr_t left_pg = node->key[node->keys - 1];
1698 uintptr_t right_pg = leaf->key[0];
1699 size_t left_cnt = (size_t) node->value[node->keys - 1];
1700 size_t right_cnt = (size_t) leaf->value[0];
1701
1702 /*
1703 * Examine the possibility that the interval fits
1704 * somewhere between the rightmost interval of
1705 * the left neigbour and the first interval of the leaf.
1706 */
1707
1708 if (page >= right_pg) {
1709 /* Do nothing. */
1710 } else if (overlaps(page, P2SZ(count), left_pg,
1711 P2SZ(left_cnt))) {
1712 /* The interval intersects with the left interval. */
1713 return false;
1714 } else if (overlaps(page, P2SZ(count), right_pg,
1715 P2SZ(right_cnt))) {
1716 /* The interval intersects with the right interval. */
1717 return false;
1718 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1719 (page + P2SZ(count) == right_pg)) {
1720 /*
1721 * The interval can be added by merging the two already
1722 * present intervals.
1723 */
1724 node->value[node->keys - 1] += count + right_cnt;
1725 btree_remove(&area->used_space, right_pg, leaf);
1726 goto success;
1727 } else if (page == left_pg + P2SZ(left_cnt)) {
1728 /*
1729 * The interval can be added by simply growing the left
1730 * interval.
1731 */
1732 node->value[node->keys - 1] += count;
1733 goto success;
1734 } else if (page + P2SZ(count) == right_pg) {
1735 /*
1736 * The interval can be addded by simply moving base of
1737 * the right interval down and increasing its size
1738 * accordingly.
1739 */
1740 leaf->value[0] += count;
1741 leaf->key[0] = page;
1742 goto success;
1743 } else {
1744 /*
1745 * The interval is between both neigbouring intervals,
1746 * but cannot be merged with any of them.
1747 */
1748 btree_insert(&area->used_space, page, (void *) count,
1749 leaf);
1750 goto success;
1751 }
1752 } else if (page < leaf->key[0]) {
1753 uintptr_t right_pg = leaf->key[0];
1754 size_t right_cnt = (size_t) leaf->value[0];
1755
1756 /*
1757 * Investigate the border case in which the left neighbour does
1758 * not exist but the interval fits from the left.
1759 */
1760
1761 if (overlaps(page, P2SZ(count), right_pg, P2SZ(right_cnt))) {
1762 /* The interval intersects with the right interval. */
1763 return false;
1764 } else if (page + P2SZ(count) == right_pg) {
1765 /*
1766 * The interval can be added by moving the base of the
1767 * right interval down and increasing its size
1768 * accordingly.
1769 */
1770 leaf->key[0] = page;
1771 leaf->value[0] += count;
1772 goto success;
1773 } else {
1774 /*
1775 * The interval doesn't adjoin with the right interval.
1776 * It must be added individually.
1777 */
1778 btree_insert(&area->used_space, page, (void *) count,
1779 leaf);
1780 goto success;
1781 }
1782 }
1783
1784 node = btree_leaf_node_right_neighbour(&area->used_space, leaf);
1785 if (node) {
1786 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1787 uintptr_t right_pg = node->key[0];
1788 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1789 size_t right_cnt = (size_t) node->value[0];
1790
1791 /*
1792 * Examine the possibility that the interval fits
1793 * somewhere between the leftmost interval of
1794 * the right neigbour and the last interval of the leaf.
1795 */
1796
1797 if (page < left_pg) {
1798 /* Do nothing. */
1799 } else if (overlaps(page, P2SZ(count), left_pg,
1800 P2SZ(left_cnt))) {
1801 /* The interval intersects with the left interval. */
1802 return false;
1803 } else if (overlaps(page, P2SZ(count), right_pg,
1804 P2SZ(right_cnt))) {
1805 /* The interval intersects with the right interval. */
1806 return false;
1807 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1808 (page + P2SZ(count) == right_pg)) {
1809 /*
1810 * The interval can be added by merging the two already
1811 * present intervals.
1812 */
1813 leaf->value[leaf->keys - 1] += count + right_cnt;
1814 btree_remove(&area->used_space, right_pg, node);
1815 goto success;
1816 } else if (page == left_pg + P2SZ(left_cnt)) {
1817 /*
1818 * The interval can be added by simply growing the left
1819 * interval.
1820 */
1821 leaf->value[leaf->keys - 1] += count;
1822 goto success;
1823 } else if (page + P2SZ(count) == right_pg) {
1824 /*
1825 * The interval can be addded by simply moving base of
1826 * the right interval down and increasing its size
1827 * accordingly.
1828 */
1829 node->value[0] += count;
1830 node->key[0] = page;
1831 goto success;
1832 } else {
1833 /*
1834 * The interval is between both neigbouring intervals,
1835 * but cannot be merged with any of them.
1836 */
1837 btree_insert(&area->used_space, page, (void *) count,
1838 leaf);
1839 goto success;
1840 }
1841 } else if (page >= leaf->key[leaf->keys - 1]) {
1842 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1843 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1844
1845 /*
1846 * Investigate the border case in which the right neighbour
1847 * does not exist but the interval fits from the right.
1848 */
1849
1850 if (overlaps(page, P2SZ(count), left_pg, P2SZ(left_cnt))) {
1851 /* The interval intersects with the left interval. */
1852 return false;
1853 } else if (left_pg + P2SZ(left_cnt) == page) {
1854 /*
1855 * The interval can be added by growing the left
1856 * interval.
1857 */
1858 leaf->value[leaf->keys - 1] += count;
1859 goto success;
1860 } else {
1861 /*
1862 * The interval doesn't adjoin with the left interval.
1863 * It must be added individually.
1864 */
1865 btree_insert(&area->used_space, page, (void *) count,
1866 leaf);
1867 goto success;
1868 }
1869 }
1870
1871 /*
1872 * Note that if the algorithm made it thus far, the interval can fit
1873 * only between two other intervals of the leaf. The two border cases
1874 * were already resolved.
1875 */
1876 btree_key_t i;
1877 for (i = 1; i < leaf->keys; i++) {
1878 if (page < leaf->key[i]) {
1879 uintptr_t left_pg = leaf->key[i - 1];
1880 uintptr_t right_pg = leaf->key[i];
1881 size_t left_cnt = (size_t) leaf->value[i - 1];
1882 size_t right_cnt = (size_t) leaf->value[i];
1883
1884 /*
1885 * The interval fits between left_pg and right_pg.
1886 */
1887
1888 if (overlaps(page, P2SZ(count), left_pg,
1889 P2SZ(left_cnt))) {
1890 /*
1891 * The interval intersects with the left
1892 * interval.
1893 */
1894 return false;
1895 } else if (overlaps(page, P2SZ(count), right_pg,
1896 P2SZ(right_cnt))) {
1897 /*
1898 * The interval intersects with the right
1899 * interval.
1900 */
1901 return false;
1902 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1903 (page + P2SZ(count) == right_pg)) {
1904 /*
1905 * The interval can be added by merging the two
1906 * already present intervals.
1907 */
1908 leaf->value[i - 1] += count + right_cnt;
1909 btree_remove(&area->used_space, right_pg, leaf);
1910 goto success;
1911 } else if (page == left_pg + P2SZ(left_cnt)) {
1912 /*
1913 * The interval can be added by simply growing
1914 * the left interval.
1915 */
1916 leaf->value[i - 1] += count;
1917 goto success;
1918 } else if (page + P2SZ(count) == right_pg) {
1919 /*
1920 * The interval can be addded by simply moving
1921 * base of the right interval down and
1922 * increasing its size accordingly.
1923 */
1924 leaf->value[i] += count;
1925 leaf->key[i] = page;
1926 goto success;
1927 } else {
1928 /*
1929 * The interval is between both neigbouring
1930 * intervals, but cannot be merged with any of
1931 * them.
1932 */
1933 btree_insert(&area->used_space, page,
1934 (void *) count, leaf);
1935 goto success;
1936 }
1937 }
1938 }
1939
1940 panic("Inconsistency detected while adding %zu pages of used "
1941 "space at %p.", count, (void *) page);
1942
1943success:
1944 area->resident += count;
1945 return true;
1946}
1947
1948/** Mark portion of address space area as unused.
1949 *
1950 * The address space area must be already locked.
1951 *
1952 * @param area Address space area.
1953 * @param page First page to be marked.
1954 * @param count Number of page to be marked.
1955 *
1956 * @return False on failure or true on success.
1957 *
1958 */
1959bool used_space_remove(as_area_t *area, uintptr_t page, size_t count)
1960{
1961 ASSERT(mutex_locked(&area->lock));
1962 ASSERT(IS_ALIGNED(page, PAGE_SIZE));
1963 ASSERT(count);
1964
1965 btree_node_t *leaf;
1966 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1967 if (pages) {
1968 /*
1969 * We are lucky, page is the beginning of some interval.
1970 */
1971 if (count > pages) {
1972 return false;
1973 } else if (count == pages) {
1974 btree_remove(&area->used_space, page, leaf);
1975 goto success;
1976 } else {
1977 /*
1978 * Find the respective interval.
1979 * Decrease its size and relocate its start address.
1980 */
1981 btree_key_t i;
1982 for (i = 0; i < leaf->keys; i++) {
1983 if (leaf->key[i] == page) {
1984 leaf->key[i] += P2SZ(count);
1985 leaf->value[i] -= count;
1986 goto success;
1987 }
1988 }
1989
1990 goto error;
1991 }
1992 }
1993
1994 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space,
1995 leaf);
1996 if ((node) && (page < leaf->key[0])) {
1997 uintptr_t left_pg = node->key[node->keys - 1];
1998 size_t left_cnt = (size_t) node->value[node->keys - 1];
1999
2000 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2001 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2002 /*
2003 * The interval is contained in the rightmost
2004 * interval of the left neighbour and can be
2005 * removed by updating the size of the bigger
2006 * interval.
2007 */
2008 node->value[node->keys - 1] -= count;
2009 goto success;
2010 } else if (page + P2SZ(count) <
2011 left_pg + P2SZ(left_cnt)) {
2012 size_t new_cnt;
2013
2014 /*
2015 * The interval is contained in the rightmost
2016 * interval of the left neighbour but its
2017 * removal requires both updating the size of
2018 * the original interval and also inserting a
2019 * new interval.
2020 */
2021 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2022 (page + P2SZ(count))) >> PAGE_WIDTH;
2023 node->value[node->keys - 1] -= count + new_cnt;
2024 btree_insert(&area->used_space, page +
2025 P2SZ(count), (void *) new_cnt, leaf);
2026 goto success;
2027 }
2028 }
2029
2030 return false;
2031 } else if (page < leaf->key[0])
2032 return false;
2033
2034 if (page > leaf->key[leaf->keys - 1]) {
2035 uintptr_t left_pg = leaf->key[leaf->keys - 1];
2036 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
2037
2038 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2039 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2040 /*
2041 * The interval is contained in the rightmost
2042 * interval of the leaf and can be removed by
2043 * updating the size of the bigger interval.
2044 */
2045 leaf->value[leaf->keys - 1] -= count;
2046 goto success;
2047 } else if (page + P2SZ(count) < left_pg +
2048 P2SZ(left_cnt)) {
2049 size_t new_cnt;
2050
2051 /*
2052 * The interval is contained in the rightmost
2053 * interval of the leaf but its removal
2054 * requires both updating the size of the
2055 * original interval and also inserting a new
2056 * interval.
2057 */
2058 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2059 (page + P2SZ(count))) >> PAGE_WIDTH;
2060 leaf->value[leaf->keys - 1] -= count + new_cnt;
2061 btree_insert(&area->used_space, page +
2062 P2SZ(count), (void *) new_cnt, leaf);
2063 goto success;
2064 }
2065 }
2066
2067 return false;
2068 }
2069
2070 /*
2071 * The border cases have been already resolved.
2072 * Now the interval can be only between intervals of the leaf.
2073 */
2074 btree_key_t i;
2075 for (i = 1; i < leaf->keys - 1; i++) {
2076 if (page < leaf->key[i]) {
2077 uintptr_t left_pg = leaf->key[i - 1];
2078 size_t left_cnt = (size_t) leaf->value[i - 1];
2079
2080 /*
2081 * Now the interval is between intervals corresponding
2082 * to (i - 1) and i.
2083 */
2084 if (overlaps(left_pg, P2SZ(left_cnt), page,
2085 P2SZ(count))) {
2086 if (page + P2SZ(count) ==
2087 left_pg + P2SZ(left_cnt)) {
2088 /*
2089 * The interval is contained in the
2090 * interval (i - 1) of the leaf and can
2091 * be removed by updating the size of
2092 * the bigger interval.
2093 */
2094 leaf->value[i - 1] -= count;
2095 goto success;
2096 } else if (page + P2SZ(count) <
2097 left_pg + P2SZ(left_cnt)) {
2098 size_t new_cnt;
2099
2100 /*
2101 * The interval is contained in the
2102 * interval (i - 1) of the leaf but its
2103 * removal requires both updating the
2104 * size of the original interval and
2105 * also inserting a new interval.
2106 */
2107 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2108 (page + P2SZ(count))) >>
2109 PAGE_WIDTH;
2110 leaf->value[i - 1] -= count + new_cnt;
2111 btree_insert(&area->used_space, page +
2112 P2SZ(count), (void *) new_cnt,
2113 leaf);
2114 goto success;
2115 }
2116 }
2117
2118 return false;
2119 }
2120 }
2121
2122error:
2123 panic("Inconsistency detected while removing %zu pages of used "
2124 "space from %p.", count, (void *) page);
2125
2126success:
2127 area->resident -= count;
2128 return true;
2129}
2130
2131/*
2132 * Address space related syscalls.
2133 */
2134
2135sysarg_t sys_as_area_create(uintptr_t base, size_t size, unsigned int flags,
2136 uintptr_t bound)
2137{
2138 uintptr_t virt = base;
2139 as_area_t *area = as_area_create(AS, flags, size,
2140 AS_AREA_ATTR_NONE, &anon_backend, NULL, &virt, bound);
2141 if (area == NULL)
2142 return (sysarg_t) -1;
2143
2144 return (sysarg_t) virt;
2145}
2146
2147sysarg_t sys_as_area_resize(uintptr_t address, size_t size, unsigned int flags)
2148{
2149 return (sysarg_t) as_area_resize(AS, address, size, 0);
2150}
2151
2152sysarg_t sys_as_area_change_flags(uintptr_t address, unsigned int flags)
2153{
2154 return (sysarg_t) as_area_change_flags(AS, flags, address);
2155}
2156
2157sysarg_t sys_as_area_destroy(uintptr_t address)
2158{
2159 return (sysarg_t) as_area_destroy(AS, address);
2160}
2161
2162/** Get list of adress space areas.
2163 *
2164 * @param as Address space.
2165 * @param obuf Place to save pointer to returned buffer.
2166 * @param osize Place to save size of returned buffer.
2167 *
2168 */
2169void as_get_area_info(as_t *as, as_area_info_t **obuf, size_t *osize)
2170{
2171 mutex_lock(&as->lock);
2172
2173 /* First pass, count number of areas. */
2174
2175 size_t area_cnt = 0;
2176
2177 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t,
2178 node) {
2179 area_cnt += node->keys;
2180 }
2181
2182 size_t isize = area_cnt * sizeof(as_area_info_t);
2183 as_area_info_t *info = malloc(isize, 0);
2184
2185 /* Second pass, record data. */
2186
2187 size_t area_idx = 0;
2188
2189 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t,
2190 node) {
2191 btree_key_t i;
2192
2193 for (i = 0; i < node->keys; i++) {
2194 as_area_t *area = node->value[i];
2195
2196 ASSERT(area_idx < area_cnt);
2197 mutex_lock(&area->lock);
2198
2199 info[area_idx].start_addr = area->base;
2200 info[area_idx].size = P2SZ(area->pages);
2201 info[area_idx].flags = area->flags;
2202 ++area_idx;
2203
2204 mutex_unlock(&area->lock);
2205 }
2206 }
2207
2208 mutex_unlock(&as->lock);
2209
2210 *obuf = info;
2211 *osize = isize;
2212}
2213
2214/** Print out information about address space.
2215 *
2216 * @param as Address space.
2217 *
2218 */
2219void as_print(as_t *as)
2220{
2221 mutex_lock(&as->lock);
2222
2223 /* Print out info about address space areas */
2224 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t,
2225 node) {
2226 btree_key_t i;
2227
2228 for (i = 0; i < node->keys; i++) {
2229 as_area_t *area = node->value[i];
2230
2231 mutex_lock(&area->lock);
2232 printf("as_area: %p, base=%p, pages=%zu"
2233 " (%p - %p)\n", area, (void *) area->base,
2234 area->pages, (void *) area->base,
2235 (void *) (area->base + P2SZ(area->pages)));
2236 mutex_unlock(&area->lock);
2237 }
2238 }
2239
2240 mutex_unlock(&as->lock);
2241}
2242
2243/** @}
2244 */
Note: See TracBrowser for help on using the repository browser.