source: mainline/kernel/generic/src/mm/as.c@ d1e8440

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since d1e8440 was 57355a40, checked in by Jakub Jermar <jakub@…>, 13 years ago

Rather than testing for address overlap with kernel address space,
test whether the address range is contained in the user address space.

  • Property mode set to 100644
File size: 55.6 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericmm
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Address space related functions.
36 *
37 * This file contains address space manipulation functions.
38 * Roughly speaking, this is a higher-level client of
39 * Virtual Address Translation (VAT) subsystem.
40 *
41 * Functionality provided by this file allows one to
42 * create address spaces and create, resize and share
43 * address space areas.
44 *
45 * @see page.c
46 *
47 */
48
49#include <mm/as.h>
50#include <arch/mm/as.h>
51#include <mm/page.h>
52#include <mm/frame.h>
53#include <mm/slab.h>
54#include <mm/tlb.h>
55#include <arch/mm/page.h>
56#include <genarch/mm/page_pt.h>
57#include <genarch/mm/page_ht.h>
58#include <mm/asid.h>
59#include <arch/mm/asid.h>
60#include <preemption.h>
61#include <synch/spinlock.h>
62#include <synch/mutex.h>
63#include <adt/list.h>
64#include <adt/btree.h>
65#include <proc/task.h>
66#include <proc/thread.h>
67#include <arch/asm.h>
68#include <panic.h>
69#include <debug.h>
70#include <print.h>
71#include <memstr.h>
72#include <macros.h>
73#include <bitops.h>
74#include <arch.h>
75#include <errno.h>
76#include <config.h>
77#include <align.h>
78#include <typedefs.h>
79#include <syscall/copy.h>
80#include <arch/interrupt.h>
81
82/**
83 * Each architecture decides what functions will be used to carry out
84 * address space operations such as creating or locking page tables.
85 */
86as_operations_t *as_operations = NULL;
87
88/** Slab for as_t objects.
89 *
90 */
91static slab_cache_t *as_slab;
92
93/** ASID subsystem lock.
94 *
95 * This lock protects:
96 * - inactive_as_with_asid_list
97 * - as->asid for each as of the as_t type
98 * - asids_allocated counter
99 *
100 */
101SPINLOCK_INITIALIZE(asidlock);
102
103/**
104 * Inactive address spaces (on all processors)
105 * that have valid ASID.
106 */
107LIST_INITIALIZE(inactive_as_with_asid_list);
108
109/** Kernel address space. */
110as_t *AS_KERNEL = NULL;
111
112NO_TRACE static int as_constructor(void *obj, unsigned int flags)
113{
114 as_t *as = (as_t *) obj;
115
116 link_initialize(&as->inactive_as_with_asid_link);
117 mutex_initialize(&as->lock, MUTEX_PASSIVE);
118
119 return as_constructor_arch(as, flags);
120}
121
122NO_TRACE static size_t as_destructor(void *obj)
123{
124 return as_destructor_arch((as_t *) obj);
125}
126
127/** Initialize address space subsystem. */
128void as_init(void)
129{
130 as_arch_init();
131
132 as_slab = slab_cache_create("as_t", sizeof(as_t), 0,
133 as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
134
135 AS_KERNEL = as_create(FLAG_AS_KERNEL);
136 if (!AS_KERNEL)
137 panic("Cannot create kernel address space.");
138
139 /*
140 * Make sure the kernel address space
141 * reference count never drops to zero.
142 */
143 as_hold(AS_KERNEL);
144}
145
146/** Create address space.
147 *
148 * @param flags Flags that influence the way in wich the address
149 * space is created.
150 *
151 */
152as_t *as_create(unsigned int flags)
153{
154 as_t *as = (as_t *) slab_alloc(as_slab, 0);
155 (void) as_create_arch(as, 0);
156
157 btree_create(&as->as_area_btree);
158
159 if (flags & FLAG_AS_KERNEL)
160 as->asid = ASID_KERNEL;
161 else
162 as->asid = ASID_INVALID;
163
164 atomic_set(&as->refcount, 0);
165 as->cpu_refcount = 0;
166
167#ifdef AS_PAGE_TABLE
168 as->genarch.page_table = page_table_create(flags);
169#else
170 page_table_create(flags);
171#endif
172
173 return as;
174}
175
176/** Destroy adress space.
177 *
178 * When there are no tasks referencing this address space (i.e. its refcount is
179 * zero), the address space can be destroyed.
180 *
181 * We know that we don't hold any spinlock.
182 *
183 * @param as Address space to be destroyed.
184 *
185 */
186void as_destroy(as_t *as)
187{
188 DEADLOCK_PROBE_INIT(p_asidlock);
189
190 ASSERT(as != AS);
191 ASSERT(atomic_get(&as->refcount) == 0);
192
193 /*
194 * Since there is no reference to this address space, it is safe not to
195 * lock its mutex.
196 */
197
198 /*
199 * We need to avoid deadlock between TLB shootdown and asidlock.
200 * We therefore try to take asid conditionally and if we don't succeed,
201 * we enable interrupts and try again. This is done while preemption is
202 * disabled to prevent nested context switches. We also depend on the
203 * fact that so far no spinlocks are held.
204 */
205 preemption_disable();
206 ipl_t ipl = interrupts_read();
207
208retry:
209 interrupts_disable();
210 if (!spinlock_trylock(&asidlock)) {
211 interrupts_enable();
212 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
213 goto retry;
214 }
215
216 /* Interrupts disabled, enable preemption */
217 preemption_enable();
218
219 if ((as->asid != ASID_INVALID) && (as != AS_KERNEL)) {
220 if (as->cpu_refcount == 0)
221 list_remove(&as->inactive_as_with_asid_link);
222
223 asid_put(as->asid);
224 }
225
226 spinlock_unlock(&asidlock);
227 interrupts_restore(ipl);
228
229
230 /*
231 * Destroy address space areas of the address space.
232 * The B+tree must be walked carefully because it is
233 * also being destroyed.
234 */
235 bool cond = true;
236 while (cond) {
237 ASSERT(!list_empty(&as->as_area_btree.leaf_list));
238
239 btree_node_t *node =
240 list_get_instance(list_first(&as->as_area_btree.leaf_list),
241 btree_node_t, leaf_link);
242
243 if ((cond = node->keys))
244 as_area_destroy(as, node->key[0]);
245 }
246
247 btree_destroy(&as->as_area_btree);
248
249#ifdef AS_PAGE_TABLE
250 page_table_destroy(as->genarch.page_table);
251#else
252 page_table_destroy(NULL);
253#endif
254
255 slab_free(as_slab, as);
256}
257
258/** Hold a reference to an address space.
259 *
260 * Holding a reference to an address space prevents destruction
261 * of that address space.
262 *
263 * @param as Address space to be held.
264 *
265 */
266NO_TRACE void as_hold(as_t *as)
267{
268 atomic_inc(&as->refcount);
269}
270
271/** Release a reference to an address space.
272 *
273 * The last one to release a reference to an address space
274 * destroys the address space.
275 *
276 * @param asAddress space to be released.
277 *
278 */
279NO_TRACE void as_release(as_t *as)
280{
281 if (atomic_predec(&as->refcount) == 0)
282 as_destroy(as);
283}
284
285/** Check area conflicts with other areas.
286 *
287 * @param as Address space.
288 * @param addr Starting virtual address of the area being tested.
289 * @param count Number of pages in the area being tested.
290 * @param guarded True if the area being tested is protected by guard pages.
291 * @param avoid Do not touch this area.
292 *
293 * @return True if there is no conflict, false otherwise.
294 *
295 */
296NO_TRACE static bool check_area_conflicts(as_t *as, uintptr_t addr,
297 size_t count, bool guarded, as_area_t *avoid)
298{
299 ASSERT((addr % PAGE_SIZE) == 0);
300 ASSERT(mutex_locked(&as->lock));
301
302 /*
303 * If the addition of the supposed area address and size overflows,
304 * report conflict.
305 */
306 if (overflows_into_positive(addr, P2SZ(count)))
307 return false;
308
309 /*
310 * We don't want any area to have conflicts with NULL page.
311 */
312 if (overlaps(addr, P2SZ(count), (uintptr_t) NULL, PAGE_SIZE))
313 return false;
314
315 /*
316 * The leaf node is found in O(log n), where n is proportional to
317 * the number of address space areas belonging to as.
318 * The check for conflicts is then attempted on the rightmost
319 * record in the left neighbour, the leftmost record in the right
320 * neighbour and all records in the leaf node itself.
321 */
322 btree_node_t *leaf;
323 as_area_t *area =
324 (as_area_t *) btree_search(&as->as_area_btree, addr, &leaf);
325 if (area) {
326 if (area != avoid)
327 return false;
328 }
329
330 /* First, check the two border cases. */
331 btree_node_t *node =
332 btree_leaf_node_left_neighbour(&as->as_area_btree, leaf);
333 if (node) {
334 area = (as_area_t *) node->value[node->keys - 1];
335
336 if (area != avoid) {
337 mutex_lock(&area->lock);
338
339 /*
340 * If at least one of the two areas are protected
341 * by the AS_AREA_GUARD flag then we must be sure
342 * that they are separated by at least one unmapped
343 * page.
344 */
345 int const gp = (guarded ||
346 (area->flags & AS_AREA_GUARD)) ? 1 : 0;
347
348 /*
349 * The area comes from the left neighbour node, which
350 * means that there already are some areas in the leaf
351 * node, which in turn means that adding gp is safe and
352 * will not cause an integer overflow.
353 */
354 if (overlaps(addr, P2SZ(count), area->base,
355 P2SZ(area->pages + gp))) {
356 mutex_unlock(&area->lock);
357 return false;
358 }
359
360 mutex_unlock(&area->lock);
361 }
362 }
363
364 node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf);
365 if (node) {
366 area = (as_area_t *) node->value[0];
367
368 if (area != avoid) {
369 int gp;
370
371 mutex_lock(&area->lock);
372
373 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
374 if (gp && overflows(addr, P2SZ(count))) {
375 /*
376 * Guard page not needed if the supposed area
377 * is adjacent to the end of the address space.
378 * We already know that the following test is
379 * going to fail...
380 */
381 gp--;
382 }
383
384 if (overlaps(addr, P2SZ(count + gp), area->base,
385 P2SZ(area->pages))) {
386 mutex_unlock(&area->lock);
387 return false;
388 }
389
390 mutex_unlock(&area->lock);
391 }
392 }
393
394 /* Second, check the leaf node. */
395 btree_key_t i;
396 for (i = 0; i < leaf->keys; i++) {
397 area = (as_area_t *) leaf->value[i];
398 int agp;
399 int gp;
400
401 if (area == avoid)
402 continue;
403
404 mutex_lock(&area->lock);
405
406 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
407 agp = gp;
408
409 /*
410 * Sanitize the two possible unsigned integer overflows.
411 */
412 if (gp && overflows(addr, P2SZ(count)))
413 gp--;
414 if (agp && overflows(area->base, P2SZ(area->pages)))
415 agp--;
416
417 if (overlaps(addr, P2SZ(count + gp), area->base,
418 P2SZ(area->pages + agp))) {
419 mutex_unlock(&area->lock);
420 return false;
421 }
422
423 mutex_unlock(&area->lock);
424 }
425
426 /*
427 * So far, the area does not conflict with other areas.
428 * Check if it is contained in the user address space.
429 */
430 if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
431 return iswithin(USER_ADDRESS_SPACE_START,
432 (USER_ADDRESS_SPACE_END - USER_ADDRESS_SPACE_START) + 1,
433 addr, P2SZ(count));
434 }
435
436 return true;
437}
438
439/** Return pointer to unmapped address space area
440 *
441 * The address space must be already locked when calling
442 * this function.
443 *
444 * @param as Address space.
445 * @param bound Lowest address bound.
446 * @param size Requested size of the allocation.
447 * @param guarded True if the allocation must be protected by guard pages.
448 *
449 * @return Address of the beginning of unmapped address space area.
450 * @return -1 if no suitable address space area was found.
451 *
452 */
453NO_TRACE static uintptr_t as_get_unmapped_area(as_t *as, uintptr_t bound,
454 size_t size, bool guarded)
455{
456 ASSERT(mutex_locked(&as->lock));
457
458 if (size == 0)
459 return (uintptr_t) -1;
460
461 /*
462 * Make sure we allocate from page-aligned
463 * address. Check for possible overflow in
464 * each step.
465 */
466
467 size_t pages = SIZE2FRAMES(size);
468
469 /*
470 * Find the lowest unmapped address aligned on the size
471 * boundary, not smaller than bound and of the required size.
472 */
473
474 /* First check the bound address itself */
475 uintptr_t addr = ALIGN_UP(bound, PAGE_SIZE);
476 if (addr >= bound) {
477 if (guarded) {
478 /* Leave an unmapped page between the lower
479 * bound and the area's start address.
480 */
481 addr += P2SZ(1);
482 }
483
484 if (check_area_conflicts(as, addr, pages, guarded, NULL))
485 return addr;
486 }
487
488 /* Eventually check the addresses behind each area */
489 list_foreach(as->as_area_btree.leaf_list, cur) {
490 btree_node_t *node =
491 list_get_instance(cur, btree_node_t, leaf_link);
492
493 for (btree_key_t i = 0; i < node->keys; i++) {
494 as_area_t *area = (as_area_t *) node->value[i];
495
496 mutex_lock(&area->lock);
497
498 addr =
499 ALIGN_UP(area->base + P2SZ(area->pages), PAGE_SIZE);
500
501 if (guarded || area->flags & AS_AREA_GUARD) {
502 /* We must leave an unmapped page
503 * between the two areas.
504 */
505 addr += P2SZ(1);
506 }
507
508 bool avail =
509 ((addr >= bound) && (addr >= area->base) &&
510 (check_area_conflicts(as, addr, pages, guarded, area)));
511
512 mutex_unlock(&area->lock);
513
514 if (avail)
515 return addr;
516 }
517 }
518
519 /* No suitable address space area found */
520 return (uintptr_t) -1;
521}
522
523/** Create address space area of common attributes.
524 *
525 * The created address space area is added to the target address space.
526 *
527 * @param as Target address space.
528 * @param flags Flags of the area memory.
529 * @param size Size of area.
530 * @param attrs Attributes of the area.
531 * @param backend Address space area backend. NULL if no backend is used.
532 * @param backend_data NULL or a pointer to an array holding two void *.
533 * @param base Starting virtual address of the area.
534 * If set to -1, a suitable mappable area is found.
535 * @param bound Lowest address bound if base is set to -1.
536 * Otherwise ignored.
537 *
538 * @return Address space area on success or NULL on failure.
539 *
540 */
541as_area_t *as_area_create(as_t *as, unsigned int flags, size_t size,
542 unsigned int attrs, mem_backend_t *backend,
543 mem_backend_data_t *backend_data, uintptr_t *base, uintptr_t bound)
544{
545 if ((*base != (uintptr_t) -1) && ((*base % PAGE_SIZE) != 0))
546 return NULL;
547
548 if (size == 0)
549 return NULL;
550
551 size_t pages = SIZE2FRAMES(size);
552
553 /* Writeable executable areas are not supported. */
554 if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
555 return NULL;
556
557 bool const guarded = flags & AS_AREA_GUARD;
558
559 mutex_lock(&as->lock);
560
561 if (*base == (uintptr_t) -1) {
562 *base = as_get_unmapped_area(as, bound, size, guarded);
563 if (*base == (uintptr_t) -1) {
564 mutex_unlock(&as->lock);
565 return NULL;
566 }
567 }
568
569 if (overflows_into_positive(*base, size))
570 return NULL;
571
572 if (!check_area_conflicts(as, *base, pages, guarded, NULL)) {
573 mutex_unlock(&as->lock);
574 return NULL;
575 }
576
577 as_area_t *area = (as_area_t *) malloc(sizeof(as_area_t), 0);
578
579 mutex_initialize(&area->lock, MUTEX_PASSIVE);
580
581 area->as = as;
582 area->flags = flags;
583 area->attributes = attrs;
584 area->pages = pages;
585 area->resident = 0;
586 area->base = *base;
587 area->sh_info = NULL;
588 area->backend = backend;
589
590 if (backend_data)
591 area->backend_data = *backend_data;
592 else
593 memsetb(&area->backend_data, sizeof(area->backend_data), 0);
594
595 if (area->backend && area->backend->create) {
596 if (!area->backend->create(area)) {
597 free(area);
598 mutex_unlock(&as->lock);
599 return NULL;
600 }
601 }
602
603 btree_create(&area->used_space);
604 btree_insert(&as->as_area_btree, *base, (void *) area,
605 NULL);
606
607 mutex_unlock(&as->lock);
608
609 return area;
610}
611
612/** Find address space area and lock it.
613 *
614 * @param as Address space.
615 * @param va Virtual address.
616 *
617 * @return Locked address space area containing va on success or
618 * NULL on failure.
619 *
620 */
621NO_TRACE static as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
622{
623 ASSERT(mutex_locked(&as->lock));
624
625 btree_node_t *leaf;
626 as_area_t *area = (as_area_t *) btree_search(&as->as_area_btree, va,
627 &leaf);
628 if (area) {
629 /* va is the base address of an address space area */
630 mutex_lock(&area->lock);
631 return area;
632 }
633
634 /*
635 * Search the leaf node and the rightmost record of its left neighbour
636 * to find out whether this is a miss or va belongs to an address
637 * space area found there.
638 */
639
640 /* First, search the leaf node itself. */
641 btree_key_t i;
642
643 for (i = 0; i < leaf->keys; i++) {
644 area = (as_area_t *) leaf->value[i];
645
646 mutex_lock(&area->lock);
647
648 if ((area->base <= va) &&
649 (va <= area->base + (P2SZ(area->pages) - 1)))
650 return area;
651
652 mutex_unlock(&area->lock);
653 }
654
655 /*
656 * Second, locate the left neighbour and test its last record.
657 * Because of its position in the B+tree, it must have base < va.
658 */
659 btree_node_t *lnode = btree_leaf_node_left_neighbour(&as->as_area_btree,
660 leaf);
661 if (lnode) {
662 area = (as_area_t *) lnode->value[lnode->keys - 1];
663
664 mutex_lock(&area->lock);
665
666 if (va <= area->base + (P2SZ(area->pages) - 1))
667 return area;
668
669 mutex_unlock(&area->lock);
670 }
671
672 return NULL;
673}
674
675/** Find address space area and change it.
676 *
677 * @param as Address space.
678 * @param address Virtual address belonging to the area to be changed.
679 * Must be page-aligned.
680 * @param size New size of the virtual memory block starting at
681 * address.
682 * @param flags Flags influencing the remap operation. Currently unused.
683 *
684 * @return Zero on success or a value from @ref errno.h otherwise.
685 *
686 */
687int as_area_resize(as_t *as, uintptr_t address, size_t size, unsigned int flags)
688{
689 mutex_lock(&as->lock);
690
691 /*
692 * Locate the area.
693 */
694 as_area_t *area = find_area_and_lock(as, address);
695 if (!area) {
696 mutex_unlock(&as->lock);
697 return ENOENT;
698 }
699
700 if (!area->backend->is_resizable(area)) {
701 /*
702 * The backend does not support resizing for this area.
703 */
704 mutex_unlock(&area->lock);
705 mutex_unlock(&as->lock);
706 return ENOTSUP;
707 }
708
709 if (area->sh_info) {
710 /*
711 * Remapping of shared address space areas
712 * is not supported.
713 */
714 mutex_unlock(&area->lock);
715 mutex_unlock(&as->lock);
716 return ENOTSUP;
717 }
718
719 size_t pages = SIZE2FRAMES((address - area->base) + size);
720 if (!pages) {
721 /*
722 * Zero size address space areas are not allowed.
723 */
724 mutex_unlock(&area->lock);
725 mutex_unlock(&as->lock);
726 return EPERM;
727 }
728
729 if (pages < area->pages) {
730 uintptr_t start_free = area->base + P2SZ(pages);
731
732 /*
733 * Shrinking the area.
734 * No need to check for overlaps.
735 */
736
737 page_table_lock(as, false);
738
739 /*
740 * Remove frames belonging to used space starting from
741 * the highest addresses downwards until an overlap with
742 * the resized address space area is found. Note that this
743 * is also the right way to remove part of the used_space
744 * B+tree leaf list.
745 */
746 bool cond = true;
747 while (cond) {
748 ASSERT(!list_empty(&area->used_space.leaf_list));
749
750 btree_node_t *node =
751 list_get_instance(list_last(&area->used_space.leaf_list),
752 btree_node_t, leaf_link);
753
754 if ((cond = (bool) node->keys)) {
755 uintptr_t ptr = node->key[node->keys - 1];
756 size_t size =
757 (size_t) node->value[node->keys - 1];
758 size_t i = 0;
759
760 if (overlaps(ptr, P2SZ(size), area->base,
761 P2SZ(pages))) {
762
763 if (ptr + P2SZ(size) <= start_free) {
764 /*
765 * The whole interval fits
766 * completely in the resized
767 * address space area.
768 */
769 break;
770 }
771
772 /*
773 * Part of the interval corresponding
774 * to b and c overlaps with the resized
775 * address space area.
776 */
777
778 /* We are almost done */
779 cond = false;
780 i = (start_free - ptr) >> PAGE_WIDTH;
781 if (!used_space_remove(area, start_free,
782 size - i))
783 panic("Cannot remove used space.");
784 } else {
785 /*
786 * The interval of used space can be
787 * completely removed.
788 */
789 if (!used_space_remove(area, ptr, size))
790 panic("Cannot remove used space.");
791 }
792
793 /*
794 * Start TLB shootdown sequence.
795 *
796 * The sequence is rather short and can be
797 * repeated multiple times. The reason is that
798 * we don't want to have used_space_remove()
799 * inside the sequence as it may use a blocking
800 * memory allocation for its B+tree. Blocking
801 * while holding the tlblock spinlock is
802 * forbidden and would hit a kernel assertion.
803 */
804
805 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES,
806 as->asid, area->base + P2SZ(pages),
807 area->pages - pages);
808
809 for (; i < size; i++) {
810 pte_t *pte = page_mapping_find(as,
811 ptr + P2SZ(i), false);
812
813 ASSERT(pte);
814 ASSERT(PTE_VALID(pte));
815 ASSERT(PTE_PRESENT(pte));
816
817 if ((area->backend) &&
818 (area->backend->frame_free)) {
819 area->backend->frame_free(area,
820 ptr + P2SZ(i),
821 PTE_GET_FRAME(pte));
822 }
823
824 page_mapping_remove(as, ptr + P2SZ(i));
825 }
826
827 /*
828 * Finish TLB shootdown sequence.
829 */
830
831 tlb_invalidate_pages(as->asid,
832 area->base + P2SZ(pages),
833 area->pages - pages);
834
835 /*
836 * Invalidate software translation caches
837 * (e.g. TSB on sparc64, PHT on ppc32).
838 */
839 as_invalidate_translation_cache(as,
840 area->base + P2SZ(pages),
841 area->pages - pages);
842 tlb_shootdown_finalize(ipl);
843 }
844 }
845 page_table_unlock(as, false);
846 } else {
847 /*
848 * Growing the area.
849 */
850
851 if (overflows_into_positive(address, P2SZ(pages)))
852 return EINVAL;
853
854 /*
855 * Check for overlaps with other address space areas.
856 */
857 bool const guarded = area->flags & AS_AREA_GUARD;
858 if (!check_area_conflicts(as, address, pages, guarded, area)) {
859 mutex_unlock(&area->lock);
860 mutex_unlock(&as->lock);
861 return EADDRNOTAVAIL;
862 }
863 }
864
865 if (area->backend && area->backend->resize) {
866 if (!area->backend->resize(area, pages)) {
867 mutex_unlock(&area->lock);
868 mutex_unlock(&as->lock);
869 return ENOMEM;
870 }
871 }
872
873 area->pages = pages;
874
875 mutex_unlock(&area->lock);
876 mutex_unlock(&as->lock);
877
878 return 0;
879}
880
881/** Remove reference to address space area share info.
882 *
883 * If the reference count drops to 0, the sh_info is deallocated.
884 *
885 * @param sh_info Pointer to address space area share info.
886 *
887 */
888NO_TRACE static void sh_info_remove_reference(share_info_t *sh_info)
889{
890 bool dealloc = false;
891
892 mutex_lock(&sh_info->lock);
893 ASSERT(sh_info->refcount);
894
895 if (--sh_info->refcount == 0) {
896 dealloc = true;
897
898 /*
899 * Now walk carefully the pagemap B+tree and free/remove
900 * reference from all frames found there.
901 */
902 list_foreach(sh_info->pagemap.leaf_list, cur) {
903 btree_node_t *node
904 = list_get_instance(cur, btree_node_t, leaf_link);
905 btree_key_t i;
906
907 for (i = 0; i < node->keys; i++)
908 frame_free((uintptr_t) node->value[i]);
909 }
910
911 }
912 mutex_unlock(&sh_info->lock);
913
914 if (dealloc) {
915 btree_destroy(&sh_info->pagemap);
916 free(sh_info);
917 }
918}
919
920/** Destroy address space area.
921 *
922 * @param as Address space.
923 * @param address Address within the area to be deleted.
924 *
925 * @return Zero on success or a value from @ref errno.h on failure.
926 *
927 */
928int as_area_destroy(as_t *as, uintptr_t address)
929{
930 mutex_lock(&as->lock);
931
932 as_area_t *area = find_area_and_lock(as, address);
933 if (!area) {
934 mutex_unlock(&as->lock);
935 return ENOENT;
936 }
937
938 if (area->backend && area->backend->destroy)
939 area->backend->destroy(area);
940
941 uintptr_t base = area->base;
942
943 page_table_lock(as, false);
944
945 /*
946 * Start TLB shootdown sequence.
947 */
948 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
949 area->pages);
950
951 /*
952 * Visit only the pages mapped by used_space B+tree.
953 */
954 list_foreach(area->used_space.leaf_list, cur) {
955 btree_node_t *node;
956 btree_key_t i;
957
958 node = list_get_instance(cur, btree_node_t, leaf_link);
959 for (i = 0; i < node->keys; i++) {
960 uintptr_t ptr = node->key[i];
961 size_t size;
962
963 for (size = 0; size < (size_t) node->value[i]; size++) {
964 pte_t *pte = page_mapping_find(as,
965 ptr + P2SZ(size), false);
966
967 ASSERT(pte);
968 ASSERT(PTE_VALID(pte));
969 ASSERT(PTE_PRESENT(pte));
970
971 if ((area->backend) &&
972 (area->backend->frame_free)) {
973 area->backend->frame_free(area,
974 ptr + P2SZ(size),
975 PTE_GET_FRAME(pte));
976 }
977
978 page_mapping_remove(as, ptr + P2SZ(size));
979 }
980 }
981 }
982
983 /*
984 * Finish TLB shootdown sequence.
985 */
986
987 tlb_invalidate_pages(as->asid, area->base, area->pages);
988
989 /*
990 * Invalidate potential software translation caches
991 * (e.g. TSB on sparc64, PHT on ppc32).
992 */
993 as_invalidate_translation_cache(as, area->base, area->pages);
994 tlb_shootdown_finalize(ipl);
995
996 page_table_unlock(as, false);
997
998 btree_destroy(&area->used_space);
999
1000 area->attributes |= AS_AREA_ATTR_PARTIAL;
1001
1002 if (area->sh_info)
1003 sh_info_remove_reference(area->sh_info);
1004
1005 mutex_unlock(&area->lock);
1006
1007 /*
1008 * Remove the empty area from address space.
1009 */
1010 btree_remove(&as->as_area_btree, base, NULL);
1011
1012 free(area);
1013
1014 mutex_unlock(&as->lock);
1015 return 0;
1016}
1017
1018/** Share address space area with another or the same address space.
1019 *
1020 * Address space area mapping is shared with a new address space area.
1021 * If the source address space area has not been shared so far,
1022 * a new sh_info is created. The new address space area simply gets the
1023 * sh_info of the source area. The process of duplicating the
1024 * mapping is done through the backend share function.
1025 *
1026 * @param src_as Pointer to source address space.
1027 * @param src_base Base address of the source address space area.
1028 * @param acc_size Expected size of the source area.
1029 * @param dst_as Pointer to destination address space.
1030 * @param dst_flags_mask Destination address space area flags mask.
1031 * @param dst_base Target base address. If set to -1,
1032 * a suitable mappable area is found.
1033 * @param bound Lowest address bound if dst_base is set to -1.
1034 * Otherwise ignored.
1035 *
1036 * @return Zero on success.
1037 * @return ENOENT if there is no such task or such address space.
1038 * @return EPERM if there was a problem in accepting the area.
1039 * @return ENOMEM if there was a problem in allocating destination
1040 * address space area.
1041 * @return ENOTSUP if the address space area backend does not support
1042 * sharing.
1043 *
1044 */
1045int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
1046 as_t *dst_as, unsigned int dst_flags_mask, uintptr_t *dst_base,
1047 uintptr_t bound)
1048{
1049 mutex_lock(&src_as->lock);
1050 as_area_t *src_area = find_area_and_lock(src_as, src_base);
1051 if (!src_area) {
1052 /*
1053 * Could not find the source address space area.
1054 */
1055 mutex_unlock(&src_as->lock);
1056 return ENOENT;
1057 }
1058
1059 if (!src_area->backend->is_shareable(src_area)) {
1060 /*
1061 * The backend does not permit sharing of this area.
1062 */
1063 mutex_unlock(&src_area->lock);
1064 mutex_unlock(&src_as->lock);
1065 return ENOTSUP;
1066 }
1067
1068 size_t src_size = P2SZ(src_area->pages);
1069 unsigned int src_flags = src_area->flags;
1070 mem_backend_t *src_backend = src_area->backend;
1071 mem_backend_data_t src_backend_data = src_area->backend_data;
1072
1073 /* Share the cacheable flag from the original mapping */
1074 if (src_flags & AS_AREA_CACHEABLE)
1075 dst_flags_mask |= AS_AREA_CACHEABLE;
1076
1077 if ((src_size != acc_size) ||
1078 ((src_flags & dst_flags_mask) != dst_flags_mask)) {
1079 mutex_unlock(&src_area->lock);
1080 mutex_unlock(&src_as->lock);
1081 return EPERM;
1082 }
1083
1084 /*
1085 * Now we are committed to sharing the area.
1086 * First, prepare the area for sharing.
1087 * Then it will be safe to unlock it.
1088 */
1089 share_info_t *sh_info = src_area->sh_info;
1090 if (!sh_info) {
1091 sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0);
1092 mutex_initialize(&sh_info->lock, MUTEX_PASSIVE);
1093 sh_info->refcount = 2;
1094 btree_create(&sh_info->pagemap);
1095 src_area->sh_info = sh_info;
1096
1097 /*
1098 * Call the backend to setup sharing.
1099 */
1100 src_area->backend->share(src_area);
1101 } else {
1102 mutex_lock(&sh_info->lock);
1103 sh_info->refcount++;
1104 mutex_unlock(&sh_info->lock);
1105 }
1106
1107 mutex_unlock(&src_area->lock);
1108 mutex_unlock(&src_as->lock);
1109
1110 /*
1111 * Create copy of the source address space area.
1112 * The destination area is created with AS_AREA_ATTR_PARTIAL
1113 * attribute set which prevents race condition with
1114 * preliminary as_page_fault() calls.
1115 * The flags of the source area are masked against dst_flags_mask
1116 * to support sharing in less privileged mode.
1117 */
1118 as_area_t *dst_area = as_area_create(dst_as, dst_flags_mask,
1119 src_size, AS_AREA_ATTR_PARTIAL, src_backend,
1120 &src_backend_data, dst_base, bound);
1121 if (!dst_area) {
1122 /*
1123 * Destination address space area could not be created.
1124 */
1125 sh_info_remove_reference(sh_info);
1126
1127 return ENOMEM;
1128 }
1129
1130 /*
1131 * Now the destination address space area has been
1132 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
1133 * attribute and set the sh_info.
1134 */
1135 mutex_lock(&dst_as->lock);
1136 mutex_lock(&dst_area->lock);
1137 dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
1138 dst_area->sh_info = sh_info;
1139 mutex_unlock(&dst_area->lock);
1140 mutex_unlock(&dst_as->lock);
1141
1142 return 0;
1143}
1144
1145/** Check access mode for address space area.
1146 *
1147 * @param area Address space area.
1148 * @param access Access mode.
1149 *
1150 * @return False if access violates area's permissions, true
1151 * otherwise.
1152 *
1153 */
1154NO_TRACE bool as_area_check_access(as_area_t *area, pf_access_t access)
1155{
1156 ASSERT(mutex_locked(&area->lock));
1157
1158 int flagmap[] = {
1159 [PF_ACCESS_READ] = AS_AREA_READ,
1160 [PF_ACCESS_WRITE] = AS_AREA_WRITE,
1161 [PF_ACCESS_EXEC] = AS_AREA_EXEC
1162 };
1163
1164 if (!(area->flags & flagmap[access]))
1165 return false;
1166
1167 return true;
1168}
1169
1170/** Convert address space area flags to page flags.
1171 *
1172 * @param aflags Flags of some address space area.
1173 *
1174 * @return Flags to be passed to page_mapping_insert().
1175 *
1176 */
1177NO_TRACE static unsigned int area_flags_to_page_flags(unsigned int aflags)
1178{
1179 unsigned int flags = PAGE_USER | PAGE_PRESENT;
1180
1181 if (aflags & AS_AREA_READ)
1182 flags |= PAGE_READ;
1183
1184 if (aflags & AS_AREA_WRITE)
1185 flags |= PAGE_WRITE;
1186
1187 if (aflags & AS_AREA_EXEC)
1188 flags |= PAGE_EXEC;
1189
1190 if (aflags & AS_AREA_CACHEABLE)
1191 flags |= PAGE_CACHEABLE;
1192
1193 return flags;
1194}
1195
1196/** Change adress space area flags.
1197 *
1198 * The idea is to have the same data, but with a different access mode.
1199 * This is needed e.g. for writing code into memory and then executing it.
1200 * In order for this to work properly, this may copy the data
1201 * into private anonymous memory (unless it's already there).
1202 *
1203 * @param as Address space.
1204 * @param flags Flags of the area memory.
1205 * @param address Address within the area to be changed.
1206 *
1207 * @return Zero on success or a value from @ref errno.h on failure.
1208 *
1209 */
1210int as_area_change_flags(as_t *as, unsigned int flags, uintptr_t address)
1211{
1212 /* Flags for the new memory mapping */
1213 unsigned int page_flags = area_flags_to_page_flags(flags);
1214
1215 mutex_lock(&as->lock);
1216
1217 as_area_t *area = find_area_and_lock(as, address);
1218 if (!area) {
1219 mutex_unlock(&as->lock);
1220 return ENOENT;
1221 }
1222
1223 if ((area->sh_info) || (area->backend != &anon_backend)) {
1224 /* Copying shared areas not supported yet */
1225 /* Copying non-anonymous memory not supported yet */
1226 mutex_unlock(&area->lock);
1227 mutex_unlock(&as->lock);
1228 return ENOTSUP;
1229 }
1230
1231 /*
1232 * Compute total number of used pages in the used_space B+tree
1233 */
1234 size_t used_pages = 0;
1235
1236 list_foreach(area->used_space.leaf_list, cur) {
1237 btree_node_t *node
1238 = list_get_instance(cur, btree_node_t, leaf_link);
1239 btree_key_t i;
1240
1241 for (i = 0; i < node->keys; i++)
1242 used_pages += (size_t) node->value[i];
1243 }
1244
1245 /* An array for storing frame numbers */
1246 uintptr_t *old_frame = malloc(used_pages * sizeof(uintptr_t), 0);
1247
1248 page_table_lock(as, false);
1249
1250 /*
1251 * Start TLB shootdown sequence.
1252 */
1253 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
1254 area->pages);
1255
1256 /*
1257 * Remove used pages from page tables and remember their frame
1258 * numbers.
1259 */
1260 size_t frame_idx = 0;
1261
1262 list_foreach(area->used_space.leaf_list, cur) {
1263 btree_node_t *node = list_get_instance(cur, btree_node_t,
1264 leaf_link);
1265 btree_key_t i;
1266
1267 for (i = 0; i < node->keys; i++) {
1268 uintptr_t ptr = node->key[i];
1269 size_t size;
1270
1271 for (size = 0; size < (size_t) node->value[i]; size++) {
1272 pte_t *pte = page_mapping_find(as,
1273 ptr + P2SZ(size), false);
1274
1275 ASSERT(pte);
1276 ASSERT(PTE_VALID(pte));
1277 ASSERT(PTE_PRESENT(pte));
1278
1279 old_frame[frame_idx++] = PTE_GET_FRAME(pte);
1280
1281 /* Remove old mapping */
1282 page_mapping_remove(as, ptr + P2SZ(size));
1283 }
1284 }
1285 }
1286
1287 /*
1288 * Finish TLB shootdown sequence.
1289 */
1290
1291 tlb_invalidate_pages(as->asid, area->base, area->pages);
1292
1293 /*
1294 * Invalidate potential software translation caches
1295 * (e.g. TSB on sparc64, PHT on ppc32).
1296 */
1297 as_invalidate_translation_cache(as, area->base, area->pages);
1298 tlb_shootdown_finalize(ipl);
1299
1300 page_table_unlock(as, false);
1301
1302 /*
1303 * Set the new flags.
1304 */
1305 area->flags = flags;
1306
1307 /*
1308 * Map pages back in with new flags. This step is kept separate
1309 * so that the memory area could not be accesed with both the old and
1310 * the new flags at once.
1311 */
1312 frame_idx = 0;
1313
1314 list_foreach(area->used_space.leaf_list, cur) {
1315 btree_node_t *node
1316 = list_get_instance(cur, btree_node_t, leaf_link);
1317 btree_key_t i;
1318
1319 for (i = 0; i < node->keys; i++) {
1320 uintptr_t ptr = node->key[i];
1321 size_t size;
1322
1323 for (size = 0; size < (size_t) node->value[i]; size++) {
1324 page_table_lock(as, false);
1325
1326 /* Insert the new mapping */
1327 page_mapping_insert(as, ptr + P2SZ(size),
1328 old_frame[frame_idx++], page_flags);
1329
1330 page_table_unlock(as, false);
1331 }
1332 }
1333 }
1334
1335 free(old_frame);
1336
1337 mutex_unlock(&area->lock);
1338 mutex_unlock(&as->lock);
1339
1340 return 0;
1341}
1342
1343/** Handle page fault within the current address space.
1344 *
1345 * This is the high-level page fault handler. It decides whether the page fault
1346 * can be resolved by any backend and if so, it invokes the backend to resolve
1347 * the page fault.
1348 *
1349 * Interrupts are assumed disabled.
1350 *
1351 * @param page Faulting page.
1352 * @param access Access mode that caused the page fault (i.e.
1353 * read/write/exec).
1354 * @param istate Pointer to the interrupted state.
1355 *
1356 * @return AS_PF_FAULT on page fault.
1357 * @return AS_PF_OK on success.
1358 * @return AS_PF_DEFER if the fault was caused by copy_to_uspace()
1359 * or copy_from_uspace().
1360 *
1361 */
1362int as_page_fault(uintptr_t page, pf_access_t access, istate_t *istate)
1363{
1364 if (!THREAD)
1365 return AS_PF_FAULT;
1366
1367 if (!AS)
1368 return AS_PF_FAULT;
1369
1370 mutex_lock(&AS->lock);
1371 as_area_t *area = find_area_and_lock(AS, page);
1372 if (!area) {
1373 /*
1374 * No area contained mapping for 'page'.
1375 * Signal page fault to low-level handler.
1376 */
1377 mutex_unlock(&AS->lock);
1378 goto page_fault;
1379 }
1380
1381 if (area->attributes & AS_AREA_ATTR_PARTIAL) {
1382 /*
1383 * The address space area is not fully initialized.
1384 * Avoid possible race by returning error.
1385 */
1386 mutex_unlock(&area->lock);
1387 mutex_unlock(&AS->lock);
1388 goto page_fault;
1389 }
1390
1391 if ((!area->backend) || (!area->backend->page_fault)) {
1392 /*
1393 * The address space area is not backed by any backend
1394 * or the backend cannot handle page faults.
1395 */
1396 mutex_unlock(&area->lock);
1397 mutex_unlock(&AS->lock);
1398 goto page_fault;
1399 }
1400
1401 page_table_lock(AS, false);
1402
1403 /*
1404 * To avoid race condition between two page faults on the same address,
1405 * we need to make sure the mapping has not been already inserted.
1406 */
1407 pte_t *pte;
1408 if ((pte = page_mapping_find(AS, page, false))) {
1409 if (PTE_PRESENT(pte)) {
1410 if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) ||
1411 (access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) ||
1412 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) {
1413 page_table_unlock(AS, false);
1414 mutex_unlock(&area->lock);
1415 mutex_unlock(&AS->lock);
1416 return AS_PF_OK;
1417 }
1418 }
1419 }
1420
1421 /*
1422 * Resort to the backend page fault handler.
1423 */
1424 if (area->backend->page_fault(area, page, access) != AS_PF_OK) {
1425 page_table_unlock(AS, false);
1426 mutex_unlock(&area->lock);
1427 mutex_unlock(&AS->lock);
1428 goto page_fault;
1429 }
1430
1431 page_table_unlock(AS, false);
1432 mutex_unlock(&area->lock);
1433 mutex_unlock(&AS->lock);
1434 return AS_PF_OK;
1435
1436page_fault:
1437 if (THREAD->in_copy_from_uspace) {
1438 THREAD->in_copy_from_uspace = false;
1439 istate_set_retaddr(istate,
1440 (uintptr_t) &memcpy_from_uspace_failover_address);
1441 } else if (THREAD->in_copy_to_uspace) {
1442 THREAD->in_copy_to_uspace = false;
1443 istate_set_retaddr(istate,
1444 (uintptr_t) &memcpy_to_uspace_failover_address);
1445 } else {
1446 return AS_PF_FAULT;
1447 }
1448
1449 return AS_PF_DEFER;
1450}
1451
1452/** Switch address spaces.
1453 *
1454 * Note that this function cannot sleep as it is essentially a part of
1455 * scheduling. Sleeping here would lead to deadlock on wakeup. Another
1456 * thing which is forbidden in this context is locking the address space.
1457 *
1458 * When this function is entered, no spinlocks may be held.
1459 *
1460 * @param old Old address space or NULL.
1461 * @param new New address space.
1462 *
1463 */
1464void as_switch(as_t *old_as, as_t *new_as)
1465{
1466 DEADLOCK_PROBE_INIT(p_asidlock);
1467 preemption_disable();
1468
1469retry:
1470 (void) interrupts_disable();
1471 if (!spinlock_trylock(&asidlock)) {
1472 /*
1473 * Avoid deadlock with TLB shootdown.
1474 * We can enable interrupts here because
1475 * preemption is disabled. We should not be
1476 * holding any other lock.
1477 */
1478 (void) interrupts_enable();
1479 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
1480 goto retry;
1481 }
1482 preemption_enable();
1483
1484 /*
1485 * First, take care of the old address space.
1486 */
1487 if (old_as) {
1488 ASSERT(old_as->cpu_refcount);
1489
1490 if ((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
1491 /*
1492 * The old address space is no longer active on
1493 * any processor. It can be appended to the
1494 * list of inactive address spaces with assigned
1495 * ASID.
1496 */
1497 ASSERT(old_as->asid != ASID_INVALID);
1498
1499 list_append(&old_as->inactive_as_with_asid_link,
1500 &inactive_as_with_asid_list);
1501 }
1502
1503 /*
1504 * Perform architecture-specific tasks when the address space
1505 * is being removed from the CPU.
1506 */
1507 as_deinstall_arch(old_as);
1508 }
1509
1510 /*
1511 * Second, prepare the new address space.
1512 */
1513 if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
1514 if (new_as->asid != ASID_INVALID)
1515 list_remove(&new_as->inactive_as_with_asid_link);
1516 else
1517 new_as->asid = asid_get();
1518 }
1519
1520#ifdef AS_PAGE_TABLE
1521 SET_PTL0_ADDRESS(new_as->genarch.page_table);
1522#endif
1523
1524 /*
1525 * Perform architecture-specific steps.
1526 * (e.g. write ASID to hardware register etc.)
1527 */
1528 as_install_arch(new_as);
1529
1530 spinlock_unlock(&asidlock);
1531
1532 AS = new_as;
1533}
1534
1535/** Compute flags for virtual address translation subsytem.
1536 *
1537 * @param area Address space area.
1538 *
1539 * @return Flags to be used in page_mapping_insert().
1540 *
1541 */
1542NO_TRACE unsigned int as_area_get_flags(as_area_t *area)
1543{
1544 ASSERT(mutex_locked(&area->lock));
1545
1546 return area_flags_to_page_flags(area->flags);
1547}
1548
1549/** Create page table.
1550 *
1551 * Depending on architecture, create either address space private or global page
1552 * table.
1553 *
1554 * @param flags Flags saying whether the page table is for the kernel
1555 * address space.
1556 *
1557 * @return First entry of the page table.
1558 *
1559 */
1560NO_TRACE pte_t *page_table_create(unsigned int flags)
1561{
1562 ASSERT(as_operations);
1563 ASSERT(as_operations->page_table_create);
1564
1565 return as_operations->page_table_create(flags);
1566}
1567
1568/** Destroy page table.
1569 *
1570 * Destroy page table in architecture specific way.
1571 *
1572 * @param page_table Physical address of PTL0.
1573 *
1574 */
1575NO_TRACE void page_table_destroy(pte_t *page_table)
1576{
1577 ASSERT(as_operations);
1578 ASSERT(as_operations->page_table_destroy);
1579
1580 as_operations->page_table_destroy(page_table);
1581}
1582
1583/** Lock page table.
1584 *
1585 * This function should be called before any page_mapping_insert(),
1586 * page_mapping_remove() and page_mapping_find().
1587 *
1588 * Locking order is such that address space areas must be locked
1589 * prior to this call. Address space can be locked prior to this
1590 * call in which case the lock argument is false.
1591 *
1592 * @param as Address space.
1593 * @param lock If false, do not attempt to lock as->lock.
1594 *
1595 */
1596NO_TRACE void page_table_lock(as_t *as, bool lock)
1597{
1598 ASSERT(as_operations);
1599 ASSERT(as_operations->page_table_lock);
1600
1601 as_operations->page_table_lock(as, lock);
1602}
1603
1604/** Unlock page table.
1605 *
1606 * @param as Address space.
1607 * @param unlock If false, do not attempt to unlock as->lock.
1608 *
1609 */
1610NO_TRACE void page_table_unlock(as_t *as, bool unlock)
1611{
1612 ASSERT(as_operations);
1613 ASSERT(as_operations->page_table_unlock);
1614
1615 as_operations->page_table_unlock(as, unlock);
1616}
1617
1618/** Test whether page tables are locked.
1619 *
1620 * @param as Address space where the page tables belong.
1621 *
1622 * @return True if the page tables belonging to the address soace
1623 * are locked, otherwise false.
1624 */
1625NO_TRACE bool page_table_locked(as_t *as)
1626{
1627 ASSERT(as_operations);
1628 ASSERT(as_operations->page_table_locked);
1629
1630 return as_operations->page_table_locked(as);
1631}
1632
1633/** Return size of the address space area with given base.
1634 *
1635 * @param base Arbitrary address inside the address space area.
1636 *
1637 * @return Size of the address space area in bytes or zero if it
1638 * does not exist.
1639 *
1640 */
1641size_t as_area_get_size(uintptr_t base)
1642{
1643 size_t size;
1644
1645 page_table_lock(AS, true);
1646 as_area_t *src_area = find_area_and_lock(AS, base);
1647
1648 if (src_area) {
1649 size = P2SZ(src_area->pages);
1650 mutex_unlock(&src_area->lock);
1651 } else
1652 size = 0;
1653
1654 page_table_unlock(AS, true);
1655 return size;
1656}
1657
1658/** Mark portion of address space area as used.
1659 *
1660 * The address space area must be already locked.
1661 *
1662 * @param area Address space area.
1663 * @param page First page to be marked.
1664 * @param count Number of page to be marked.
1665 *
1666 * @return False on failure or true on success.
1667 *
1668 */
1669bool used_space_insert(as_area_t *area, uintptr_t page, size_t count)
1670{
1671 ASSERT(mutex_locked(&area->lock));
1672 ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
1673 ASSERT(count);
1674
1675 btree_node_t *leaf;
1676 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1677 if (pages) {
1678 /*
1679 * We hit the beginning of some used space.
1680 */
1681 return false;
1682 }
1683
1684 if (!leaf->keys) {
1685 btree_insert(&area->used_space, page, (void *) count, leaf);
1686 goto success;
1687 }
1688
1689 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
1690 if (node) {
1691 uintptr_t left_pg = node->key[node->keys - 1];
1692 uintptr_t right_pg = leaf->key[0];
1693 size_t left_cnt = (size_t) node->value[node->keys - 1];
1694 size_t right_cnt = (size_t) leaf->value[0];
1695
1696 /*
1697 * Examine the possibility that the interval fits
1698 * somewhere between the rightmost interval of
1699 * the left neigbour and the first interval of the leaf.
1700 */
1701
1702 if (page >= right_pg) {
1703 /* Do nothing. */
1704 } else if (overlaps(page, P2SZ(count), left_pg,
1705 P2SZ(left_cnt))) {
1706 /* The interval intersects with the left interval. */
1707 return false;
1708 } else if (overlaps(page, P2SZ(count), right_pg,
1709 P2SZ(right_cnt))) {
1710 /* The interval intersects with the right interval. */
1711 return false;
1712 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1713 (page + P2SZ(count) == right_pg)) {
1714 /*
1715 * The interval can be added by merging the two already
1716 * present intervals.
1717 */
1718 node->value[node->keys - 1] += count + right_cnt;
1719 btree_remove(&area->used_space, right_pg, leaf);
1720 goto success;
1721 } else if (page == left_pg + P2SZ(left_cnt)) {
1722 /*
1723 * The interval can be added by simply growing the left
1724 * interval.
1725 */
1726 node->value[node->keys - 1] += count;
1727 goto success;
1728 } else if (page + P2SZ(count) == right_pg) {
1729 /*
1730 * The interval can be addded by simply moving base of
1731 * the right interval down and increasing its size
1732 * accordingly.
1733 */
1734 leaf->value[0] += count;
1735 leaf->key[0] = page;
1736 goto success;
1737 } else {
1738 /*
1739 * The interval is between both neigbouring intervals,
1740 * but cannot be merged with any of them.
1741 */
1742 btree_insert(&area->used_space, page, (void *) count,
1743 leaf);
1744 goto success;
1745 }
1746 } else if (page < leaf->key[0]) {
1747 uintptr_t right_pg = leaf->key[0];
1748 size_t right_cnt = (size_t) leaf->value[0];
1749
1750 /*
1751 * Investigate the border case in which the left neighbour does
1752 * not exist but the interval fits from the left.
1753 */
1754
1755 if (overlaps(page, P2SZ(count), right_pg, P2SZ(right_cnt))) {
1756 /* The interval intersects with the right interval. */
1757 return false;
1758 } else if (page + P2SZ(count) == right_pg) {
1759 /*
1760 * The interval can be added by moving the base of the
1761 * right interval down and increasing its size
1762 * accordingly.
1763 */
1764 leaf->key[0] = page;
1765 leaf->value[0] += count;
1766 goto success;
1767 } else {
1768 /*
1769 * The interval doesn't adjoin with the right interval.
1770 * It must be added individually.
1771 */
1772 btree_insert(&area->used_space, page, (void *) count,
1773 leaf);
1774 goto success;
1775 }
1776 }
1777
1778 node = btree_leaf_node_right_neighbour(&area->used_space, leaf);
1779 if (node) {
1780 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1781 uintptr_t right_pg = node->key[0];
1782 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1783 size_t right_cnt = (size_t) node->value[0];
1784
1785 /*
1786 * Examine the possibility that the interval fits
1787 * somewhere between the leftmost interval of
1788 * the right neigbour and the last interval of the leaf.
1789 */
1790
1791 if (page < left_pg) {
1792 /* Do nothing. */
1793 } else if (overlaps(page, P2SZ(count), left_pg,
1794 P2SZ(left_cnt))) {
1795 /* The interval intersects with the left interval. */
1796 return false;
1797 } else if (overlaps(page, P2SZ(count), right_pg,
1798 P2SZ(right_cnt))) {
1799 /* The interval intersects with the right interval. */
1800 return false;
1801 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1802 (page + P2SZ(count) == right_pg)) {
1803 /*
1804 * The interval can be added by merging the two already
1805 * present intervals.
1806 */
1807 leaf->value[leaf->keys - 1] += count + right_cnt;
1808 btree_remove(&area->used_space, right_pg, node);
1809 goto success;
1810 } else if (page == left_pg + P2SZ(left_cnt)) {
1811 /*
1812 * The interval can be added by simply growing the left
1813 * interval.
1814 */
1815 leaf->value[leaf->keys - 1] += count;
1816 goto success;
1817 } else if (page + P2SZ(count) == right_pg) {
1818 /*
1819 * The interval can be addded by simply moving base of
1820 * the right interval down and increasing its size
1821 * accordingly.
1822 */
1823 node->value[0] += count;
1824 node->key[0] = page;
1825 goto success;
1826 } else {
1827 /*
1828 * The interval is between both neigbouring intervals,
1829 * but cannot be merged with any of them.
1830 */
1831 btree_insert(&area->used_space, page, (void *) count,
1832 leaf);
1833 goto success;
1834 }
1835 } else if (page >= leaf->key[leaf->keys - 1]) {
1836 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1837 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1838
1839 /*
1840 * Investigate the border case in which the right neighbour
1841 * does not exist but the interval fits from the right.
1842 */
1843
1844 if (overlaps(page, P2SZ(count), left_pg, P2SZ(left_cnt))) {
1845 /* The interval intersects with the left interval. */
1846 return false;
1847 } else if (left_pg + P2SZ(left_cnt) == page) {
1848 /*
1849 * The interval can be added by growing the left
1850 * interval.
1851 */
1852 leaf->value[leaf->keys - 1] += count;
1853 goto success;
1854 } else {
1855 /*
1856 * The interval doesn't adjoin with the left interval.
1857 * It must be added individually.
1858 */
1859 btree_insert(&area->used_space, page, (void *) count,
1860 leaf);
1861 goto success;
1862 }
1863 }
1864
1865 /*
1866 * Note that if the algorithm made it thus far, the interval can fit
1867 * only between two other intervals of the leaf. The two border cases
1868 * were already resolved.
1869 */
1870 btree_key_t i;
1871 for (i = 1; i < leaf->keys; i++) {
1872 if (page < leaf->key[i]) {
1873 uintptr_t left_pg = leaf->key[i - 1];
1874 uintptr_t right_pg = leaf->key[i];
1875 size_t left_cnt = (size_t) leaf->value[i - 1];
1876 size_t right_cnt = (size_t) leaf->value[i];
1877
1878 /*
1879 * The interval fits between left_pg and right_pg.
1880 */
1881
1882 if (overlaps(page, P2SZ(count), left_pg,
1883 P2SZ(left_cnt))) {
1884 /*
1885 * The interval intersects with the left
1886 * interval.
1887 */
1888 return false;
1889 } else if (overlaps(page, P2SZ(count), right_pg,
1890 P2SZ(right_cnt))) {
1891 /*
1892 * The interval intersects with the right
1893 * interval.
1894 */
1895 return false;
1896 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1897 (page + P2SZ(count) == right_pg)) {
1898 /*
1899 * The interval can be added by merging the two
1900 * already present intervals.
1901 */
1902 leaf->value[i - 1] += count + right_cnt;
1903 btree_remove(&area->used_space, right_pg, leaf);
1904 goto success;
1905 } else if (page == left_pg + P2SZ(left_cnt)) {
1906 /*
1907 * The interval can be added by simply growing
1908 * the left interval.
1909 */
1910 leaf->value[i - 1] += count;
1911 goto success;
1912 } else if (page + P2SZ(count) == right_pg) {
1913 /*
1914 * The interval can be addded by simply moving
1915 * base of the right interval down and
1916 * increasing its size accordingly.
1917 */
1918 leaf->value[i] += count;
1919 leaf->key[i] = page;
1920 goto success;
1921 } else {
1922 /*
1923 * The interval is between both neigbouring
1924 * intervals, but cannot be merged with any of
1925 * them.
1926 */
1927 btree_insert(&area->used_space, page,
1928 (void *) count, leaf);
1929 goto success;
1930 }
1931 }
1932 }
1933
1934 panic("Inconsistency detected while adding %zu pages of used "
1935 "space at %p.", count, (void *) page);
1936
1937success:
1938 area->resident += count;
1939 return true;
1940}
1941
1942/** Mark portion of address space area as unused.
1943 *
1944 * The address space area must be already locked.
1945 *
1946 * @param area Address space area.
1947 * @param page First page to be marked.
1948 * @param count Number of page to be marked.
1949 *
1950 * @return False on failure or true on success.
1951 *
1952 */
1953bool used_space_remove(as_area_t *area, uintptr_t page, size_t count)
1954{
1955 ASSERT(mutex_locked(&area->lock));
1956 ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
1957 ASSERT(count);
1958
1959 btree_node_t *leaf;
1960 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1961 if (pages) {
1962 /*
1963 * We are lucky, page is the beginning of some interval.
1964 */
1965 if (count > pages) {
1966 return false;
1967 } else if (count == pages) {
1968 btree_remove(&area->used_space, page, leaf);
1969 goto success;
1970 } else {
1971 /*
1972 * Find the respective interval.
1973 * Decrease its size and relocate its start address.
1974 */
1975 btree_key_t i;
1976 for (i = 0; i < leaf->keys; i++) {
1977 if (leaf->key[i] == page) {
1978 leaf->key[i] += P2SZ(count);
1979 leaf->value[i] -= count;
1980 goto success;
1981 }
1982 }
1983
1984 goto error;
1985 }
1986 }
1987
1988 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space,
1989 leaf);
1990 if ((node) && (page < leaf->key[0])) {
1991 uintptr_t left_pg = node->key[node->keys - 1];
1992 size_t left_cnt = (size_t) node->value[node->keys - 1];
1993
1994 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
1995 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
1996 /*
1997 * The interval is contained in the rightmost
1998 * interval of the left neighbour and can be
1999 * removed by updating the size of the bigger
2000 * interval.
2001 */
2002 node->value[node->keys - 1] -= count;
2003 goto success;
2004 } else if (page + P2SZ(count) <
2005 left_pg + P2SZ(left_cnt)) {
2006 size_t new_cnt;
2007
2008 /*
2009 * The interval is contained in the rightmost
2010 * interval of the left neighbour but its
2011 * removal requires both updating the size of
2012 * the original interval and also inserting a
2013 * new interval.
2014 */
2015 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2016 (page + P2SZ(count))) >> PAGE_WIDTH;
2017 node->value[node->keys - 1] -= count + new_cnt;
2018 btree_insert(&area->used_space, page +
2019 P2SZ(count), (void *) new_cnt, leaf);
2020 goto success;
2021 }
2022 }
2023
2024 return false;
2025 } else if (page < leaf->key[0])
2026 return false;
2027
2028 if (page > leaf->key[leaf->keys - 1]) {
2029 uintptr_t left_pg = leaf->key[leaf->keys - 1];
2030 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
2031
2032 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2033 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2034 /*
2035 * The interval is contained in the rightmost
2036 * interval of the leaf and can be removed by
2037 * updating the size of the bigger interval.
2038 */
2039 leaf->value[leaf->keys - 1] -= count;
2040 goto success;
2041 } else if (page + P2SZ(count) < left_pg +
2042 P2SZ(left_cnt)) {
2043 size_t new_cnt;
2044
2045 /*
2046 * The interval is contained in the rightmost
2047 * interval of the leaf but its removal
2048 * requires both updating the size of the
2049 * original interval and also inserting a new
2050 * interval.
2051 */
2052 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2053 (page + P2SZ(count))) >> PAGE_WIDTH;
2054 leaf->value[leaf->keys - 1] -= count + new_cnt;
2055 btree_insert(&area->used_space, page +
2056 P2SZ(count), (void *) new_cnt, leaf);
2057 goto success;
2058 }
2059 }
2060
2061 return false;
2062 }
2063
2064 /*
2065 * The border cases have been already resolved.
2066 * Now the interval can be only between intervals of the leaf.
2067 */
2068 btree_key_t i;
2069 for (i = 1; i < leaf->keys - 1; i++) {
2070 if (page < leaf->key[i]) {
2071 uintptr_t left_pg = leaf->key[i - 1];
2072 size_t left_cnt = (size_t) leaf->value[i - 1];
2073
2074 /*
2075 * Now the interval is between intervals corresponding
2076 * to (i - 1) and i.
2077 */
2078 if (overlaps(left_pg, P2SZ(left_cnt), page,
2079 P2SZ(count))) {
2080 if (page + P2SZ(count) ==
2081 left_pg + P2SZ(left_cnt)) {
2082 /*
2083 * The interval is contained in the
2084 * interval (i - 1) of the leaf and can
2085 * be removed by updating the size of
2086 * the bigger interval.
2087 */
2088 leaf->value[i - 1] -= count;
2089 goto success;
2090 } else if (page + P2SZ(count) <
2091 left_pg + P2SZ(left_cnt)) {
2092 size_t new_cnt;
2093
2094 /*
2095 * The interval is contained in the
2096 * interval (i - 1) of the leaf but its
2097 * removal requires both updating the
2098 * size of the original interval and
2099 * also inserting a new interval.
2100 */
2101 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2102 (page + P2SZ(count))) >>
2103 PAGE_WIDTH;
2104 leaf->value[i - 1] -= count + new_cnt;
2105 btree_insert(&area->used_space, page +
2106 P2SZ(count), (void *) new_cnt,
2107 leaf);
2108 goto success;
2109 }
2110 }
2111
2112 return false;
2113 }
2114 }
2115
2116error:
2117 panic("Inconsistency detected while removing %zu pages of used "
2118 "space from %p.", count, (void *) page);
2119
2120success:
2121 area->resident -= count;
2122 return true;
2123}
2124
2125/*
2126 * Address space related syscalls.
2127 */
2128
2129sysarg_t sys_as_area_create(uintptr_t base, size_t size, unsigned int flags,
2130 uintptr_t bound)
2131{
2132 uintptr_t virt = base;
2133 as_area_t *area = as_area_create(AS, flags | AS_AREA_CACHEABLE, size,
2134 AS_AREA_ATTR_NONE, &anon_backend, NULL, &virt, bound);
2135 if (area == NULL)
2136 return (sysarg_t) -1;
2137
2138 return (sysarg_t) virt;
2139}
2140
2141sysarg_t sys_as_area_resize(uintptr_t address, size_t size, unsigned int flags)
2142{
2143 return (sysarg_t) as_area_resize(AS, address, size, 0);
2144}
2145
2146sysarg_t sys_as_area_change_flags(uintptr_t address, unsigned int flags)
2147{
2148 return (sysarg_t) as_area_change_flags(AS, flags, address);
2149}
2150
2151sysarg_t sys_as_area_destroy(uintptr_t address)
2152{
2153 return (sysarg_t) as_area_destroy(AS, address);
2154}
2155
2156/** Get list of adress space areas.
2157 *
2158 * @param as Address space.
2159 * @param obuf Place to save pointer to returned buffer.
2160 * @param osize Place to save size of returned buffer.
2161 *
2162 */
2163void as_get_area_info(as_t *as, as_area_info_t **obuf, size_t *osize)
2164{
2165 mutex_lock(&as->lock);
2166
2167 /* First pass, count number of areas. */
2168
2169 size_t area_cnt = 0;
2170
2171 list_foreach(as->as_area_btree.leaf_list, cur) {
2172 btree_node_t *node =
2173 list_get_instance(cur, btree_node_t, leaf_link);
2174 area_cnt += node->keys;
2175 }
2176
2177 size_t isize = area_cnt * sizeof(as_area_info_t);
2178 as_area_info_t *info = malloc(isize, 0);
2179
2180 /* Second pass, record data. */
2181
2182 size_t area_idx = 0;
2183
2184 list_foreach(as->as_area_btree.leaf_list, cur) {
2185 btree_node_t *node =
2186 list_get_instance(cur, btree_node_t, leaf_link);
2187 btree_key_t i;
2188
2189 for (i = 0; i < node->keys; i++) {
2190 as_area_t *area = node->value[i];
2191
2192 ASSERT(area_idx < area_cnt);
2193 mutex_lock(&area->lock);
2194
2195 info[area_idx].start_addr = area->base;
2196 info[area_idx].size = P2SZ(area->pages);
2197 info[area_idx].flags = area->flags;
2198 ++area_idx;
2199
2200 mutex_unlock(&area->lock);
2201 }
2202 }
2203
2204 mutex_unlock(&as->lock);
2205
2206 *obuf = info;
2207 *osize = isize;
2208}
2209
2210/** Print out information about address space.
2211 *
2212 * @param as Address space.
2213 *
2214 */
2215void as_print(as_t *as)
2216{
2217 mutex_lock(&as->lock);
2218
2219 /* Print out info about address space areas */
2220 list_foreach(as->as_area_btree.leaf_list, cur) {
2221 btree_node_t *node
2222 = list_get_instance(cur, btree_node_t, leaf_link);
2223 btree_key_t i;
2224
2225 for (i = 0; i < node->keys; i++) {
2226 as_area_t *area = node->value[i];
2227
2228 mutex_lock(&area->lock);
2229 printf("as_area: %p, base=%p, pages=%zu"
2230 " (%p - %p)\n", area, (void *) area->base,
2231 area->pages, (void *) area->base,
2232 (void *) (area->base + P2SZ(area->pages)));
2233 mutex_unlock(&area->lock);
2234 }
2235 }
2236
2237 mutex_unlock(&as->lock);
2238}
2239
2240/** @}
2241 */
Note: See TracBrowser for help on using the repository browser.