source: mainline/kernel/generic/src/mm/as.c@ c387838

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since c387838 was 94795812, checked in by Jakub Jermar <jakub@…>, 13 years ago

Check for dangerous unsigned integer overflows in check_area_conflicts().

  • Rename overflows_add() to overflows_into_positive().
  • Introduce overflows() for simple sum overflow tests.
  • Property mode set to 100644
File size: 55.6 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericmm
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Address space related functions.
36 *
37 * This file contains address space manipulation functions.
38 * Roughly speaking, this is a higher-level client of
39 * Virtual Address Translation (VAT) subsystem.
40 *
41 * Functionality provided by this file allows one to
42 * create address spaces and create, resize and share
43 * address space areas.
44 *
45 * @see page.c
46 *
47 */
48
49#include <mm/as.h>
50#include <arch/mm/as.h>
51#include <mm/page.h>
52#include <mm/frame.h>
53#include <mm/slab.h>
54#include <mm/tlb.h>
55#include <arch/mm/page.h>
56#include <genarch/mm/page_pt.h>
57#include <genarch/mm/page_ht.h>
58#include <mm/asid.h>
59#include <arch/mm/asid.h>
60#include <preemption.h>
61#include <synch/spinlock.h>
62#include <synch/mutex.h>
63#include <adt/list.h>
64#include <adt/btree.h>
65#include <proc/task.h>
66#include <proc/thread.h>
67#include <arch/asm.h>
68#include <panic.h>
69#include <debug.h>
70#include <print.h>
71#include <memstr.h>
72#include <macros.h>
73#include <bitops.h>
74#include <arch.h>
75#include <errno.h>
76#include <config.h>
77#include <align.h>
78#include <typedefs.h>
79#include <syscall/copy.h>
80#include <arch/interrupt.h>
81
82/**
83 * Each architecture decides what functions will be used to carry out
84 * address space operations such as creating or locking page tables.
85 */
86as_operations_t *as_operations = NULL;
87
88/** Slab for as_t objects.
89 *
90 */
91static slab_cache_t *as_slab;
92
93/** ASID subsystem lock.
94 *
95 * This lock protects:
96 * - inactive_as_with_asid_list
97 * - as->asid for each as of the as_t type
98 * - asids_allocated counter
99 *
100 */
101SPINLOCK_INITIALIZE(asidlock);
102
103/**
104 * Inactive address spaces (on all processors)
105 * that have valid ASID.
106 */
107LIST_INITIALIZE(inactive_as_with_asid_list);
108
109/** Kernel address space. */
110as_t *AS_KERNEL = NULL;
111
112NO_TRACE static int as_constructor(void *obj, unsigned int flags)
113{
114 as_t *as = (as_t *) obj;
115
116 link_initialize(&as->inactive_as_with_asid_link);
117 mutex_initialize(&as->lock, MUTEX_PASSIVE);
118
119 return as_constructor_arch(as, flags);
120}
121
122NO_TRACE static size_t as_destructor(void *obj)
123{
124 return as_destructor_arch((as_t *) obj);
125}
126
127/** Initialize address space subsystem. */
128void as_init(void)
129{
130 as_arch_init();
131
132 as_slab = slab_cache_create("as_t", sizeof(as_t), 0,
133 as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
134
135 AS_KERNEL = as_create(FLAG_AS_KERNEL);
136 if (!AS_KERNEL)
137 panic("Cannot create kernel address space.");
138
139 /*
140 * Make sure the kernel address space
141 * reference count never drops to zero.
142 */
143 as_hold(AS_KERNEL);
144}
145
146/** Create address space.
147 *
148 * @param flags Flags that influence the way in wich the address
149 * space is created.
150 *
151 */
152as_t *as_create(unsigned int flags)
153{
154 as_t *as = (as_t *) slab_alloc(as_slab, 0);
155 (void) as_create_arch(as, 0);
156
157 btree_create(&as->as_area_btree);
158
159 if (flags & FLAG_AS_KERNEL)
160 as->asid = ASID_KERNEL;
161 else
162 as->asid = ASID_INVALID;
163
164 atomic_set(&as->refcount, 0);
165 as->cpu_refcount = 0;
166
167#ifdef AS_PAGE_TABLE
168 as->genarch.page_table = page_table_create(flags);
169#else
170 page_table_create(flags);
171#endif
172
173 return as;
174}
175
176/** Destroy adress space.
177 *
178 * When there are no tasks referencing this address space (i.e. its refcount is
179 * zero), the address space can be destroyed.
180 *
181 * We know that we don't hold any spinlock.
182 *
183 * @param as Address space to be destroyed.
184 *
185 */
186void as_destroy(as_t *as)
187{
188 DEADLOCK_PROBE_INIT(p_asidlock);
189
190 ASSERT(as != AS);
191 ASSERT(atomic_get(&as->refcount) == 0);
192
193 /*
194 * Since there is no reference to this address space, it is safe not to
195 * lock its mutex.
196 */
197
198 /*
199 * We need to avoid deadlock between TLB shootdown and asidlock.
200 * We therefore try to take asid conditionally and if we don't succeed,
201 * we enable interrupts and try again. This is done while preemption is
202 * disabled to prevent nested context switches. We also depend on the
203 * fact that so far no spinlocks are held.
204 */
205 preemption_disable();
206 ipl_t ipl = interrupts_read();
207
208retry:
209 interrupts_disable();
210 if (!spinlock_trylock(&asidlock)) {
211 interrupts_enable();
212 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
213 goto retry;
214 }
215
216 /* Interrupts disabled, enable preemption */
217 preemption_enable();
218
219 if ((as->asid != ASID_INVALID) && (as != AS_KERNEL)) {
220 if (as->cpu_refcount == 0)
221 list_remove(&as->inactive_as_with_asid_link);
222
223 asid_put(as->asid);
224 }
225
226 spinlock_unlock(&asidlock);
227 interrupts_restore(ipl);
228
229
230 /*
231 * Destroy address space areas of the address space.
232 * The B+tree must be walked carefully because it is
233 * also being destroyed.
234 */
235 bool cond = true;
236 while (cond) {
237 ASSERT(!list_empty(&as->as_area_btree.leaf_list));
238
239 btree_node_t *node =
240 list_get_instance(list_first(&as->as_area_btree.leaf_list),
241 btree_node_t, leaf_link);
242
243 if ((cond = node->keys))
244 as_area_destroy(as, node->key[0]);
245 }
246
247 btree_destroy(&as->as_area_btree);
248
249#ifdef AS_PAGE_TABLE
250 page_table_destroy(as->genarch.page_table);
251#else
252 page_table_destroy(NULL);
253#endif
254
255 slab_free(as_slab, as);
256}
257
258/** Hold a reference to an address space.
259 *
260 * Holding a reference to an address space prevents destruction
261 * of that address space.
262 *
263 * @param as Address space to be held.
264 *
265 */
266NO_TRACE void as_hold(as_t *as)
267{
268 atomic_inc(&as->refcount);
269}
270
271/** Release a reference to an address space.
272 *
273 * The last one to release a reference to an address space
274 * destroys the address space.
275 *
276 * @param asAddress space to be released.
277 *
278 */
279NO_TRACE void as_release(as_t *as)
280{
281 if (atomic_predec(&as->refcount) == 0)
282 as_destroy(as);
283}
284
285/** Check area conflicts with other areas.
286 *
287 * @param as Address space.
288 * @param addr Starting virtual address of the area being tested.
289 * @param count Number of pages in the area being tested.
290 * @param guarded True if the area being tested is protected by guard pages.
291 * @param avoid Do not touch this area.
292 *
293 * @return True if there is no conflict, false otherwise.
294 *
295 */
296NO_TRACE static bool check_area_conflicts(as_t *as, uintptr_t addr,
297 size_t count, bool guarded, as_area_t *avoid)
298{
299 ASSERT((addr % PAGE_SIZE) == 0);
300 ASSERT(mutex_locked(&as->lock));
301
302 /*
303 * If the addition of the supposed area address and size overflows,
304 * report conflict.
305 */
306 if (overflows_into_positive(addr, P2SZ(count)))
307 return false;
308
309 /*
310 * We don't want any area to have conflicts with NULL page.
311 */
312 if (overlaps(addr, P2SZ(count), (uintptr_t) NULL, PAGE_SIZE))
313 return false;
314
315 /*
316 * The leaf node is found in O(log n), where n is proportional to
317 * the number of address space areas belonging to as.
318 * The check for conflicts is then attempted on the rightmost
319 * record in the left neighbour, the leftmost record in the right
320 * neighbour and all records in the leaf node itself.
321 */
322 btree_node_t *leaf;
323 as_area_t *area =
324 (as_area_t *) btree_search(&as->as_area_btree, addr, &leaf);
325 if (area) {
326 if (area != avoid)
327 return false;
328 }
329
330 /* First, check the two border cases. */
331 btree_node_t *node =
332 btree_leaf_node_left_neighbour(&as->as_area_btree, leaf);
333 if (node) {
334 area = (as_area_t *) node->value[node->keys - 1];
335
336 if (area != avoid) {
337 mutex_lock(&area->lock);
338
339 /*
340 * If at least one of the two areas are protected
341 * by the AS_AREA_GUARD flag then we must be sure
342 * that they are separated by at least one unmapped
343 * page.
344 */
345 int const gp = (guarded ||
346 (area->flags & AS_AREA_GUARD)) ? 1 : 0;
347
348 /*
349 * The area comes from the left neighbour node, which
350 * means that there already are some areas in the leaf
351 * node, which in turn means that adding gp is safe and
352 * will not cause an integer overflow.
353 */
354 if (overlaps(addr, P2SZ(count), area->base,
355 P2SZ(area->pages + gp))) {
356 mutex_unlock(&area->lock);
357 return false;
358 }
359
360 mutex_unlock(&area->lock);
361 }
362 }
363
364 node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf);
365 if (node) {
366 area = (as_area_t *) node->value[0];
367
368 if (area != avoid) {
369 int gp;
370
371 mutex_lock(&area->lock);
372
373 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
374 if (gp && overflows(addr, P2SZ(count))) {
375 /*
376 * Guard page not needed if the supposed area
377 * is adjacent to the end of the address space.
378 * We already know that the following test is
379 * going to fail...
380 */
381 gp--;
382 }
383
384 if (overlaps(addr, P2SZ(count + gp), area->base,
385 P2SZ(area->pages))) {
386 mutex_unlock(&area->lock);
387 return false;
388 }
389
390 mutex_unlock(&area->lock);
391 }
392 }
393
394 /* Second, check the leaf node. */
395 btree_key_t i;
396 for (i = 0; i < leaf->keys; i++) {
397 area = (as_area_t *) leaf->value[i];
398 int agp;
399 int gp;
400
401 if (area == avoid)
402 continue;
403
404 mutex_lock(&area->lock);
405
406 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
407 agp = gp;
408
409 /*
410 * Sanitize the two possible unsigned integer overflows.
411 */
412 if (gp && overflows(addr, P2SZ(count)))
413 gp--;
414 if (agp && overflows(area->base, P2SZ(area->pages)))
415 agp--;
416
417 if (overlaps(addr, P2SZ(count + gp), area->base,
418 P2SZ(area->pages + agp))) {
419 mutex_unlock(&area->lock);
420 return false;
421 }
422
423 mutex_unlock(&area->lock);
424 }
425
426 /*
427 * So far, the area does not conflict with other areas.
428 * Check if it doesn't conflict with kernel address space.
429 */
430 if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
431 return !overlaps(addr, P2SZ(count), KERNEL_ADDRESS_SPACE_START,
432 KERNEL_ADDRESS_SPACE_END - KERNEL_ADDRESS_SPACE_START);
433 }
434
435 return true;
436}
437
438/** Return pointer to unmapped address space area
439 *
440 * The address space must be already locked when calling
441 * this function.
442 *
443 * @param as Address space.
444 * @param bound Lowest address bound.
445 * @param size Requested size of the allocation.
446 * @param guarded True if the allocation must be protected by guard pages.
447 *
448 * @return Address of the beginning of unmapped address space area.
449 * @return -1 if no suitable address space area was found.
450 *
451 */
452NO_TRACE static uintptr_t as_get_unmapped_area(as_t *as, uintptr_t bound,
453 size_t size, bool guarded)
454{
455 ASSERT(mutex_locked(&as->lock));
456
457 if (size == 0)
458 return (uintptr_t) -1;
459
460 /*
461 * Make sure we allocate from page-aligned
462 * address. Check for possible overflow in
463 * each step.
464 */
465
466 size_t pages = SIZE2FRAMES(size);
467
468 /*
469 * Find the lowest unmapped address aligned on the size
470 * boundary, not smaller than bound and of the required size.
471 */
472
473 /* First check the bound address itself */
474 uintptr_t addr = ALIGN_UP(bound, PAGE_SIZE);
475 if (addr >= bound) {
476 if (guarded) {
477 /* Leave an unmapped page between the lower
478 * bound and the area's start address.
479 */
480 addr += P2SZ(1);
481 }
482
483 if (check_area_conflicts(as, addr, pages, guarded, NULL))
484 return addr;
485 }
486
487 /* Eventually check the addresses behind each area */
488 list_foreach(as->as_area_btree.leaf_list, cur) {
489 btree_node_t *node =
490 list_get_instance(cur, btree_node_t, leaf_link);
491
492 for (btree_key_t i = 0; i < node->keys; i++) {
493 as_area_t *area = (as_area_t *) node->value[i];
494
495 mutex_lock(&area->lock);
496
497 addr =
498 ALIGN_UP(area->base + P2SZ(area->pages), PAGE_SIZE);
499
500 if (guarded || area->flags & AS_AREA_GUARD) {
501 /* We must leave an unmapped page
502 * between the two areas.
503 */
504 addr += P2SZ(1);
505 }
506
507 bool avail =
508 ((addr >= bound) && (addr >= area->base) &&
509 (check_area_conflicts(as, addr, pages, guarded, area)));
510
511 mutex_unlock(&area->lock);
512
513 if (avail)
514 return addr;
515 }
516 }
517
518 /* No suitable address space area found */
519 return (uintptr_t) -1;
520}
521
522/** Create address space area of common attributes.
523 *
524 * The created address space area is added to the target address space.
525 *
526 * @param as Target address space.
527 * @param flags Flags of the area memory.
528 * @param size Size of area.
529 * @param attrs Attributes of the area.
530 * @param backend Address space area backend. NULL if no backend is used.
531 * @param backend_data NULL or a pointer to an array holding two void *.
532 * @param base Starting virtual address of the area.
533 * If set to -1, a suitable mappable area is found.
534 * @param bound Lowest address bound if base is set to -1.
535 * Otherwise ignored.
536 *
537 * @return Address space area on success or NULL on failure.
538 *
539 */
540as_area_t *as_area_create(as_t *as, unsigned int flags, size_t size,
541 unsigned int attrs, mem_backend_t *backend,
542 mem_backend_data_t *backend_data, uintptr_t *base, uintptr_t bound)
543{
544 if ((*base != (uintptr_t) -1) && ((*base % PAGE_SIZE) != 0))
545 return NULL;
546
547 if (size == 0)
548 return NULL;
549
550 size_t pages = SIZE2FRAMES(size);
551
552 /* Writeable executable areas are not supported. */
553 if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
554 return NULL;
555
556 bool const guarded = flags & AS_AREA_GUARD;
557
558 mutex_lock(&as->lock);
559
560 if (*base == (uintptr_t) -1) {
561 *base = as_get_unmapped_area(as, bound, size, guarded);
562 if (*base == (uintptr_t) -1) {
563 mutex_unlock(&as->lock);
564 return NULL;
565 }
566 }
567
568 if (overflows_into_positive(*base, size))
569 return NULL;
570
571 if (!check_area_conflicts(as, *base, pages, guarded, NULL)) {
572 mutex_unlock(&as->lock);
573 return NULL;
574 }
575
576 as_area_t *area = (as_area_t *) malloc(sizeof(as_area_t), 0);
577
578 mutex_initialize(&area->lock, MUTEX_PASSIVE);
579
580 area->as = as;
581 area->flags = flags;
582 area->attributes = attrs;
583 area->pages = pages;
584 area->resident = 0;
585 area->base = *base;
586 area->sh_info = NULL;
587 area->backend = backend;
588
589 if (backend_data)
590 area->backend_data = *backend_data;
591 else
592 memsetb(&area->backend_data, sizeof(area->backend_data), 0);
593
594 if (area->backend && area->backend->create) {
595 if (!area->backend->create(area)) {
596 free(area);
597 mutex_unlock(&as->lock);
598 return NULL;
599 }
600 }
601
602 btree_create(&area->used_space);
603 btree_insert(&as->as_area_btree, *base, (void *) area,
604 NULL);
605
606 mutex_unlock(&as->lock);
607
608 return area;
609}
610
611/** Find address space area and lock it.
612 *
613 * @param as Address space.
614 * @param va Virtual address.
615 *
616 * @return Locked address space area containing va on success or
617 * NULL on failure.
618 *
619 */
620NO_TRACE static as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
621{
622 ASSERT(mutex_locked(&as->lock));
623
624 btree_node_t *leaf;
625 as_area_t *area = (as_area_t *) btree_search(&as->as_area_btree, va,
626 &leaf);
627 if (area) {
628 /* va is the base address of an address space area */
629 mutex_lock(&area->lock);
630 return area;
631 }
632
633 /*
634 * Search the leaf node and the rightmost record of its left neighbour
635 * to find out whether this is a miss or va belongs to an address
636 * space area found there.
637 */
638
639 /* First, search the leaf node itself. */
640 btree_key_t i;
641
642 for (i = 0; i < leaf->keys; i++) {
643 area = (as_area_t *) leaf->value[i];
644
645 mutex_lock(&area->lock);
646
647 if ((area->base <= va) &&
648 (va <= area->base + (P2SZ(area->pages) - 1)))
649 return area;
650
651 mutex_unlock(&area->lock);
652 }
653
654 /*
655 * Second, locate the left neighbour and test its last record.
656 * Because of its position in the B+tree, it must have base < va.
657 */
658 btree_node_t *lnode = btree_leaf_node_left_neighbour(&as->as_area_btree,
659 leaf);
660 if (lnode) {
661 area = (as_area_t *) lnode->value[lnode->keys - 1];
662
663 mutex_lock(&area->lock);
664
665 if (va <= area->base + (P2SZ(area->pages) - 1))
666 return area;
667
668 mutex_unlock(&area->lock);
669 }
670
671 return NULL;
672}
673
674/** Find address space area and change it.
675 *
676 * @param as Address space.
677 * @param address Virtual address belonging to the area to be changed.
678 * Must be page-aligned.
679 * @param size New size of the virtual memory block starting at
680 * address.
681 * @param flags Flags influencing the remap operation. Currently unused.
682 *
683 * @return Zero on success or a value from @ref errno.h otherwise.
684 *
685 */
686int as_area_resize(as_t *as, uintptr_t address, size_t size, unsigned int flags)
687{
688 mutex_lock(&as->lock);
689
690 /*
691 * Locate the area.
692 */
693 as_area_t *area = find_area_and_lock(as, address);
694 if (!area) {
695 mutex_unlock(&as->lock);
696 return ENOENT;
697 }
698
699 if (area->backend == &phys_backend) {
700 /*
701 * Remapping of address space areas associated
702 * with memory mapped devices is not supported.
703 */
704 mutex_unlock(&area->lock);
705 mutex_unlock(&as->lock);
706 return ENOTSUP;
707 }
708
709 if (area->sh_info) {
710 /*
711 * Remapping of shared address space areas
712 * is not supported.
713 */
714 mutex_unlock(&area->lock);
715 mutex_unlock(&as->lock);
716 return ENOTSUP;
717 }
718
719 size_t pages = SIZE2FRAMES((address - area->base) + size);
720 if (!pages) {
721 /*
722 * Zero size address space areas are not allowed.
723 */
724 mutex_unlock(&area->lock);
725 mutex_unlock(&as->lock);
726 return EPERM;
727 }
728
729 if (pages < area->pages) {
730 uintptr_t start_free = area->base + P2SZ(pages);
731
732 /*
733 * Shrinking the area.
734 * No need to check for overlaps.
735 */
736
737 page_table_lock(as, false);
738
739 /*
740 * Remove frames belonging to used space starting from
741 * the highest addresses downwards until an overlap with
742 * the resized address space area is found. Note that this
743 * is also the right way to remove part of the used_space
744 * B+tree leaf list.
745 */
746 bool cond = true;
747 while (cond) {
748 ASSERT(!list_empty(&area->used_space.leaf_list));
749
750 btree_node_t *node =
751 list_get_instance(list_last(&area->used_space.leaf_list),
752 btree_node_t, leaf_link);
753
754 if ((cond = (bool) node->keys)) {
755 uintptr_t ptr = node->key[node->keys - 1];
756 size_t size =
757 (size_t) node->value[node->keys - 1];
758 size_t i = 0;
759
760 if (overlaps(ptr, P2SZ(size), area->base,
761 P2SZ(pages))) {
762
763 if (ptr + P2SZ(size) <= start_free) {
764 /*
765 * The whole interval fits
766 * completely in the resized
767 * address space area.
768 */
769 break;
770 }
771
772 /*
773 * Part of the interval corresponding
774 * to b and c overlaps with the resized
775 * address space area.
776 */
777
778 /* We are almost done */
779 cond = false;
780 i = (start_free - ptr) >> PAGE_WIDTH;
781 if (!used_space_remove(area, start_free,
782 size - i))
783 panic("Cannot remove used space.");
784 } else {
785 /*
786 * The interval of used space can be
787 * completely removed.
788 */
789 if (!used_space_remove(area, ptr, size))
790 panic("Cannot remove used space.");
791 }
792
793 /*
794 * Start TLB shootdown sequence.
795 *
796 * The sequence is rather short and can be
797 * repeated multiple times. The reason is that
798 * we don't want to have used_space_remove()
799 * inside the sequence as it may use a blocking
800 * memory allocation for its B+tree. Blocking
801 * while holding the tlblock spinlock is
802 * forbidden and would hit a kernel assertion.
803 */
804
805 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES,
806 as->asid, area->base + P2SZ(pages),
807 area->pages - pages);
808
809 for (; i < size; i++) {
810 pte_t *pte = page_mapping_find(as,
811 ptr + P2SZ(i), false);
812
813 ASSERT(pte);
814 ASSERT(PTE_VALID(pte));
815 ASSERT(PTE_PRESENT(pte));
816
817 if ((area->backend) &&
818 (area->backend->frame_free)) {
819 area->backend->frame_free(area,
820 ptr + P2SZ(i),
821 PTE_GET_FRAME(pte));
822 }
823
824 page_mapping_remove(as, ptr + P2SZ(i));
825 }
826
827 /*
828 * Finish TLB shootdown sequence.
829 */
830
831 tlb_invalidate_pages(as->asid,
832 area->base + P2SZ(pages),
833 area->pages - pages);
834
835 /*
836 * Invalidate software translation caches
837 * (e.g. TSB on sparc64, PHT on ppc32).
838 */
839 as_invalidate_translation_cache(as,
840 area->base + P2SZ(pages),
841 area->pages - pages);
842 tlb_shootdown_finalize(ipl);
843 }
844 }
845 page_table_unlock(as, false);
846 } else {
847 /*
848 * Growing the area.
849 */
850
851 if (overflows_into_positive(address, P2SZ(pages)))
852 return EINVAL;
853
854 /*
855 * Check for overlaps with other address space areas.
856 */
857 bool const guarded = area->flags & AS_AREA_GUARD;
858 if (!check_area_conflicts(as, address, pages, guarded, area)) {
859 mutex_unlock(&area->lock);
860 mutex_unlock(&as->lock);
861 return EADDRNOTAVAIL;
862 }
863 }
864
865 if (area->backend && area->backend->resize) {
866 if (!area->backend->resize(area, pages)) {
867 mutex_unlock(&area->lock);
868 mutex_unlock(&as->lock);
869 return ENOMEM;
870 }
871 }
872
873 area->pages = pages;
874
875 mutex_unlock(&area->lock);
876 mutex_unlock(&as->lock);
877
878 return 0;
879}
880
881/** Remove reference to address space area share info.
882 *
883 * If the reference count drops to 0, the sh_info is deallocated.
884 *
885 * @param sh_info Pointer to address space area share info.
886 *
887 */
888NO_TRACE static void sh_info_remove_reference(share_info_t *sh_info)
889{
890 bool dealloc = false;
891
892 mutex_lock(&sh_info->lock);
893 ASSERT(sh_info->refcount);
894
895 if (--sh_info->refcount == 0) {
896 dealloc = true;
897
898 /*
899 * Now walk carefully the pagemap B+tree and free/remove
900 * reference from all frames found there.
901 */
902 list_foreach(sh_info->pagemap.leaf_list, cur) {
903 btree_node_t *node
904 = list_get_instance(cur, btree_node_t, leaf_link);
905 btree_key_t i;
906
907 for (i = 0; i < node->keys; i++)
908 frame_free((uintptr_t) node->value[i]);
909 }
910
911 }
912 mutex_unlock(&sh_info->lock);
913
914 if (dealloc) {
915 btree_destroy(&sh_info->pagemap);
916 free(sh_info);
917 }
918}
919
920/** Destroy address space area.
921 *
922 * @param as Address space.
923 * @param address Address within the area to be deleted.
924 *
925 * @return Zero on success or a value from @ref errno.h on failure.
926 *
927 */
928int as_area_destroy(as_t *as, uintptr_t address)
929{
930 mutex_lock(&as->lock);
931
932 as_area_t *area = find_area_and_lock(as, address);
933 if (!area) {
934 mutex_unlock(&as->lock);
935 return ENOENT;
936 }
937
938 if (area->backend && area->backend->destroy)
939 area->backend->destroy(area);
940
941 uintptr_t base = area->base;
942
943 page_table_lock(as, false);
944
945 /*
946 * Start TLB shootdown sequence.
947 */
948 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
949 area->pages);
950
951 /*
952 * Visit only the pages mapped by used_space B+tree.
953 */
954 list_foreach(area->used_space.leaf_list, cur) {
955 btree_node_t *node;
956 btree_key_t i;
957
958 node = list_get_instance(cur, btree_node_t, leaf_link);
959 for (i = 0; i < node->keys; i++) {
960 uintptr_t ptr = node->key[i];
961 size_t size;
962
963 for (size = 0; size < (size_t) node->value[i]; size++) {
964 pte_t *pte = page_mapping_find(as,
965 ptr + P2SZ(size), false);
966
967 ASSERT(pte);
968 ASSERT(PTE_VALID(pte));
969 ASSERT(PTE_PRESENT(pte));
970
971 if ((area->backend) &&
972 (area->backend->frame_free)) {
973 area->backend->frame_free(area,
974 ptr + P2SZ(size),
975 PTE_GET_FRAME(pte));
976 }
977
978 page_mapping_remove(as, ptr + P2SZ(size));
979 }
980 }
981 }
982
983 /*
984 * Finish TLB shootdown sequence.
985 */
986
987 tlb_invalidate_pages(as->asid, area->base, area->pages);
988
989 /*
990 * Invalidate potential software translation caches
991 * (e.g. TSB on sparc64, PHT on ppc32).
992 */
993 as_invalidate_translation_cache(as, area->base, area->pages);
994 tlb_shootdown_finalize(ipl);
995
996 page_table_unlock(as, false);
997
998 btree_destroy(&area->used_space);
999
1000 area->attributes |= AS_AREA_ATTR_PARTIAL;
1001
1002 if (area->sh_info)
1003 sh_info_remove_reference(area->sh_info);
1004
1005 mutex_unlock(&area->lock);
1006
1007 /*
1008 * Remove the empty area from address space.
1009 */
1010 btree_remove(&as->as_area_btree, base, NULL);
1011
1012 free(area);
1013
1014 mutex_unlock(&as->lock);
1015 return 0;
1016}
1017
1018/** Share address space area with another or the same address space.
1019 *
1020 * Address space area mapping is shared with a new address space area.
1021 * If the source address space area has not been shared so far,
1022 * a new sh_info is created. The new address space area simply gets the
1023 * sh_info of the source area. The process of duplicating the
1024 * mapping is done through the backend share function.
1025 *
1026 * @param src_as Pointer to source address space.
1027 * @param src_base Base address of the source address space area.
1028 * @param acc_size Expected size of the source area.
1029 * @param dst_as Pointer to destination address space.
1030 * @param dst_flags_mask Destination address space area flags mask.
1031 * @param dst_base Target base address. If set to -1,
1032 * a suitable mappable area is found.
1033 * @param bound Lowest address bound if dst_base is set to -1.
1034 * Otherwise ignored.
1035 *
1036 * @return Zero on success.
1037 * @return ENOENT if there is no such task or such address space.
1038 * @return EPERM if there was a problem in accepting the area.
1039 * @return ENOMEM if there was a problem in allocating destination
1040 * address space area.
1041 * @return ENOTSUP if the address space area backend does not support
1042 * sharing.
1043 *
1044 */
1045int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
1046 as_t *dst_as, unsigned int dst_flags_mask, uintptr_t *dst_base,
1047 uintptr_t bound)
1048{
1049 mutex_lock(&src_as->lock);
1050 as_area_t *src_area = find_area_and_lock(src_as, src_base);
1051 if (!src_area) {
1052 /*
1053 * Could not find the source address space area.
1054 */
1055 mutex_unlock(&src_as->lock);
1056 return ENOENT;
1057 }
1058
1059 if ((!src_area->backend) || (!src_area->backend->share)) {
1060 /*
1061 * There is no backend or the backend does not
1062 * know how to share the area.
1063 */
1064 mutex_unlock(&src_area->lock);
1065 mutex_unlock(&src_as->lock);
1066 return ENOTSUP;
1067 }
1068
1069 size_t src_size = P2SZ(src_area->pages);
1070 unsigned int src_flags = src_area->flags;
1071 mem_backend_t *src_backend = src_area->backend;
1072 mem_backend_data_t src_backend_data = src_area->backend_data;
1073
1074 /* Share the cacheable flag from the original mapping */
1075 if (src_flags & AS_AREA_CACHEABLE)
1076 dst_flags_mask |= AS_AREA_CACHEABLE;
1077
1078 if ((src_size != acc_size) ||
1079 ((src_flags & dst_flags_mask) != dst_flags_mask)) {
1080 mutex_unlock(&src_area->lock);
1081 mutex_unlock(&src_as->lock);
1082 return EPERM;
1083 }
1084
1085 /*
1086 * Now we are committed to sharing the area.
1087 * First, prepare the area for sharing.
1088 * Then it will be safe to unlock it.
1089 */
1090 share_info_t *sh_info = src_area->sh_info;
1091 if (!sh_info) {
1092 sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0);
1093 mutex_initialize(&sh_info->lock, MUTEX_PASSIVE);
1094 sh_info->refcount = 2;
1095 btree_create(&sh_info->pagemap);
1096 src_area->sh_info = sh_info;
1097
1098 /*
1099 * Call the backend to setup sharing.
1100 */
1101 src_area->backend->share(src_area);
1102 } else {
1103 mutex_lock(&sh_info->lock);
1104 sh_info->refcount++;
1105 mutex_unlock(&sh_info->lock);
1106 }
1107
1108 mutex_unlock(&src_area->lock);
1109 mutex_unlock(&src_as->lock);
1110
1111 /*
1112 * Create copy of the source address space area.
1113 * The destination area is created with AS_AREA_ATTR_PARTIAL
1114 * attribute set which prevents race condition with
1115 * preliminary as_page_fault() calls.
1116 * The flags of the source area are masked against dst_flags_mask
1117 * to support sharing in less privileged mode.
1118 */
1119 as_area_t *dst_area = as_area_create(dst_as, dst_flags_mask,
1120 src_size, AS_AREA_ATTR_PARTIAL, src_backend,
1121 &src_backend_data, dst_base, bound);
1122 if (!dst_area) {
1123 /*
1124 * Destination address space area could not be created.
1125 */
1126 sh_info_remove_reference(sh_info);
1127
1128 return ENOMEM;
1129 }
1130
1131 /*
1132 * Now the destination address space area has been
1133 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
1134 * attribute and set the sh_info.
1135 */
1136 mutex_lock(&dst_as->lock);
1137 mutex_lock(&dst_area->lock);
1138 dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
1139 dst_area->sh_info = sh_info;
1140 mutex_unlock(&dst_area->lock);
1141 mutex_unlock(&dst_as->lock);
1142
1143 return 0;
1144}
1145
1146/** Check access mode for address space area.
1147 *
1148 * @param area Address space area.
1149 * @param access Access mode.
1150 *
1151 * @return False if access violates area's permissions, true
1152 * otherwise.
1153 *
1154 */
1155NO_TRACE bool as_area_check_access(as_area_t *area, pf_access_t access)
1156{
1157 ASSERT(mutex_locked(&area->lock));
1158
1159 int flagmap[] = {
1160 [PF_ACCESS_READ] = AS_AREA_READ,
1161 [PF_ACCESS_WRITE] = AS_AREA_WRITE,
1162 [PF_ACCESS_EXEC] = AS_AREA_EXEC
1163 };
1164
1165 if (!(area->flags & flagmap[access]))
1166 return false;
1167
1168 return true;
1169}
1170
1171/** Convert address space area flags to page flags.
1172 *
1173 * @param aflags Flags of some address space area.
1174 *
1175 * @return Flags to be passed to page_mapping_insert().
1176 *
1177 */
1178NO_TRACE static unsigned int area_flags_to_page_flags(unsigned int aflags)
1179{
1180 unsigned int flags = PAGE_USER | PAGE_PRESENT;
1181
1182 if (aflags & AS_AREA_READ)
1183 flags |= PAGE_READ;
1184
1185 if (aflags & AS_AREA_WRITE)
1186 flags |= PAGE_WRITE;
1187
1188 if (aflags & AS_AREA_EXEC)
1189 flags |= PAGE_EXEC;
1190
1191 if (aflags & AS_AREA_CACHEABLE)
1192 flags |= PAGE_CACHEABLE;
1193
1194 return flags;
1195}
1196
1197/** Change adress space area flags.
1198 *
1199 * The idea is to have the same data, but with a different access mode.
1200 * This is needed e.g. for writing code into memory and then executing it.
1201 * In order for this to work properly, this may copy the data
1202 * into private anonymous memory (unless it's already there).
1203 *
1204 * @param as Address space.
1205 * @param flags Flags of the area memory.
1206 * @param address Address within the area to be changed.
1207 *
1208 * @return Zero on success or a value from @ref errno.h on failure.
1209 *
1210 */
1211int as_area_change_flags(as_t *as, unsigned int flags, uintptr_t address)
1212{
1213 /* Flags for the new memory mapping */
1214 unsigned int page_flags = area_flags_to_page_flags(flags);
1215
1216 mutex_lock(&as->lock);
1217
1218 as_area_t *area = find_area_and_lock(as, address);
1219 if (!area) {
1220 mutex_unlock(&as->lock);
1221 return ENOENT;
1222 }
1223
1224 if ((area->sh_info) || (area->backend != &anon_backend)) {
1225 /* Copying shared areas not supported yet */
1226 /* Copying non-anonymous memory not supported yet */
1227 mutex_unlock(&area->lock);
1228 mutex_unlock(&as->lock);
1229 return ENOTSUP;
1230 }
1231
1232 /*
1233 * Compute total number of used pages in the used_space B+tree
1234 */
1235 size_t used_pages = 0;
1236
1237 list_foreach(area->used_space.leaf_list, cur) {
1238 btree_node_t *node
1239 = list_get_instance(cur, btree_node_t, leaf_link);
1240 btree_key_t i;
1241
1242 for (i = 0; i < node->keys; i++)
1243 used_pages += (size_t) node->value[i];
1244 }
1245
1246 /* An array for storing frame numbers */
1247 uintptr_t *old_frame = malloc(used_pages * sizeof(uintptr_t), 0);
1248
1249 page_table_lock(as, false);
1250
1251 /*
1252 * Start TLB shootdown sequence.
1253 */
1254 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
1255 area->pages);
1256
1257 /*
1258 * Remove used pages from page tables and remember their frame
1259 * numbers.
1260 */
1261 size_t frame_idx = 0;
1262
1263 list_foreach(area->used_space.leaf_list, cur) {
1264 btree_node_t *node = list_get_instance(cur, btree_node_t,
1265 leaf_link);
1266 btree_key_t i;
1267
1268 for (i = 0; i < node->keys; i++) {
1269 uintptr_t ptr = node->key[i];
1270 size_t size;
1271
1272 for (size = 0; size < (size_t) node->value[i]; size++) {
1273 pte_t *pte = page_mapping_find(as,
1274 ptr + P2SZ(size), false);
1275
1276 ASSERT(pte);
1277 ASSERT(PTE_VALID(pte));
1278 ASSERT(PTE_PRESENT(pte));
1279
1280 old_frame[frame_idx++] = PTE_GET_FRAME(pte);
1281
1282 /* Remove old mapping */
1283 page_mapping_remove(as, ptr + P2SZ(size));
1284 }
1285 }
1286 }
1287
1288 /*
1289 * Finish TLB shootdown sequence.
1290 */
1291
1292 tlb_invalidate_pages(as->asid, area->base, area->pages);
1293
1294 /*
1295 * Invalidate potential software translation caches
1296 * (e.g. TSB on sparc64, PHT on ppc32).
1297 */
1298 as_invalidate_translation_cache(as, area->base, area->pages);
1299 tlb_shootdown_finalize(ipl);
1300
1301 page_table_unlock(as, false);
1302
1303 /*
1304 * Set the new flags.
1305 */
1306 area->flags = flags;
1307
1308 /*
1309 * Map pages back in with new flags. This step is kept separate
1310 * so that the memory area could not be accesed with both the old and
1311 * the new flags at once.
1312 */
1313 frame_idx = 0;
1314
1315 list_foreach(area->used_space.leaf_list, cur) {
1316 btree_node_t *node
1317 = list_get_instance(cur, btree_node_t, leaf_link);
1318 btree_key_t i;
1319
1320 for (i = 0; i < node->keys; i++) {
1321 uintptr_t ptr = node->key[i];
1322 size_t size;
1323
1324 for (size = 0; size < (size_t) node->value[i]; size++) {
1325 page_table_lock(as, false);
1326
1327 /* Insert the new mapping */
1328 page_mapping_insert(as, ptr + P2SZ(size),
1329 old_frame[frame_idx++], page_flags);
1330
1331 page_table_unlock(as, false);
1332 }
1333 }
1334 }
1335
1336 free(old_frame);
1337
1338 mutex_unlock(&area->lock);
1339 mutex_unlock(&as->lock);
1340
1341 return 0;
1342}
1343
1344/** Handle page fault within the current address space.
1345 *
1346 * This is the high-level page fault handler. It decides whether the page fault
1347 * can be resolved by any backend and if so, it invokes the backend to resolve
1348 * the page fault.
1349 *
1350 * Interrupts are assumed disabled.
1351 *
1352 * @param page Faulting page.
1353 * @param access Access mode that caused the page fault (i.e.
1354 * read/write/exec).
1355 * @param istate Pointer to the interrupted state.
1356 *
1357 * @return AS_PF_FAULT on page fault.
1358 * @return AS_PF_OK on success.
1359 * @return AS_PF_DEFER if the fault was caused by copy_to_uspace()
1360 * or copy_from_uspace().
1361 *
1362 */
1363int as_page_fault(uintptr_t page, pf_access_t access, istate_t *istate)
1364{
1365 if (!THREAD)
1366 return AS_PF_FAULT;
1367
1368 if (!AS)
1369 return AS_PF_FAULT;
1370
1371 mutex_lock(&AS->lock);
1372 as_area_t *area = find_area_and_lock(AS, page);
1373 if (!area) {
1374 /*
1375 * No area contained mapping for 'page'.
1376 * Signal page fault to low-level handler.
1377 */
1378 mutex_unlock(&AS->lock);
1379 goto page_fault;
1380 }
1381
1382 if (area->attributes & AS_AREA_ATTR_PARTIAL) {
1383 /*
1384 * The address space area is not fully initialized.
1385 * Avoid possible race by returning error.
1386 */
1387 mutex_unlock(&area->lock);
1388 mutex_unlock(&AS->lock);
1389 goto page_fault;
1390 }
1391
1392 if ((!area->backend) || (!area->backend->page_fault)) {
1393 /*
1394 * The address space area is not backed by any backend
1395 * or the backend cannot handle page faults.
1396 */
1397 mutex_unlock(&area->lock);
1398 mutex_unlock(&AS->lock);
1399 goto page_fault;
1400 }
1401
1402 page_table_lock(AS, false);
1403
1404 /*
1405 * To avoid race condition between two page faults on the same address,
1406 * we need to make sure the mapping has not been already inserted.
1407 */
1408 pte_t *pte;
1409 if ((pte = page_mapping_find(AS, page, false))) {
1410 if (PTE_PRESENT(pte)) {
1411 if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) ||
1412 (access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) ||
1413 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) {
1414 page_table_unlock(AS, false);
1415 mutex_unlock(&area->lock);
1416 mutex_unlock(&AS->lock);
1417 return AS_PF_OK;
1418 }
1419 }
1420 }
1421
1422 /*
1423 * Resort to the backend page fault handler.
1424 */
1425 if (area->backend->page_fault(area, page, access) != AS_PF_OK) {
1426 page_table_unlock(AS, false);
1427 mutex_unlock(&area->lock);
1428 mutex_unlock(&AS->lock);
1429 goto page_fault;
1430 }
1431
1432 page_table_unlock(AS, false);
1433 mutex_unlock(&area->lock);
1434 mutex_unlock(&AS->lock);
1435 return AS_PF_OK;
1436
1437page_fault:
1438 if (THREAD->in_copy_from_uspace) {
1439 THREAD->in_copy_from_uspace = false;
1440 istate_set_retaddr(istate,
1441 (uintptr_t) &memcpy_from_uspace_failover_address);
1442 } else if (THREAD->in_copy_to_uspace) {
1443 THREAD->in_copy_to_uspace = false;
1444 istate_set_retaddr(istate,
1445 (uintptr_t) &memcpy_to_uspace_failover_address);
1446 } else {
1447 return AS_PF_FAULT;
1448 }
1449
1450 return AS_PF_DEFER;
1451}
1452
1453/** Switch address spaces.
1454 *
1455 * Note that this function cannot sleep as it is essentially a part of
1456 * scheduling. Sleeping here would lead to deadlock on wakeup. Another
1457 * thing which is forbidden in this context is locking the address space.
1458 *
1459 * When this function is entered, no spinlocks may be held.
1460 *
1461 * @param old Old address space or NULL.
1462 * @param new New address space.
1463 *
1464 */
1465void as_switch(as_t *old_as, as_t *new_as)
1466{
1467 DEADLOCK_PROBE_INIT(p_asidlock);
1468 preemption_disable();
1469
1470retry:
1471 (void) interrupts_disable();
1472 if (!spinlock_trylock(&asidlock)) {
1473 /*
1474 * Avoid deadlock with TLB shootdown.
1475 * We can enable interrupts here because
1476 * preemption is disabled. We should not be
1477 * holding any other lock.
1478 */
1479 (void) interrupts_enable();
1480 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
1481 goto retry;
1482 }
1483 preemption_enable();
1484
1485 /*
1486 * First, take care of the old address space.
1487 */
1488 if (old_as) {
1489 ASSERT(old_as->cpu_refcount);
1490
1491 if ((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
1492 /*
1493 * The old address space is no longer active on
1494 * any processor. It can be appended to the
1495 * list of inactive address spaces with assigned
1496 * ASID.
1497 */
1498 ASSERT(old_as->asid != ASID_INVALID);
1499
1500 list_append(&old_as->inactive_as_with_asid_link,
1501 &inactive_as_with_asid_list);
1502 }
1503
1504 /*
1505 * Perform architecture-specific tasks when the address space
1506 * is being removed from the CPU.
1507 */
1508 as_deinstall_arch(old_as);
1509 }
1510
1511 /*
1512 * Second, prepare the new address space.
1513 */
1514 if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
1515 if (new_as->asid != ASID_INVALID)
1516 list_remove(&new_as->inactive_as_with_asid_link);
1517 else
1518 new_as->asid = asid_get();
1519 }
1520
1521#ifdef AS_PAGE_TABLE
1522 SET_PTL0_ADDRESS(new_as->genarch.page_table);
1523#endif
1524
1525 /*
1526 * Perform architecture-specific steps.
1527 * (e.g. write ASID to hardware register etc.)
1528 */
1529 as_install_arch(new_as);
1530
1531 spinlock_unlock(&asidlock);
1532
1533 AS = new_as;
1534}
1535
1536/** Compute flags for virtual address translation subsytem.
1537 *
1538 * @param area Address space area.
1539 *
1540 * @return Flags to be used in page_mapping_insert().
1541 *
1542 */
1543NO_TRACE unsigned int as_area_get_flags(as_area_t *area)
1544{
1545 ASSERT(mutex_locked(&area->lock));
1546
1547 return area_flags_to_page_flags(area->flags);
1548}
1549
1550/** Create page table.
1551 *
1552 * Depending on architecture, create either address space private or global page
1553 * table.
1554 *
1555 * @param flags Flags saying whether the page table is for the kernel
1556 * address space.
1557 *
1558 * @return First entry of the page table.
1559 *
1560 */
1561NO_TRACE pte_t *page_table_create(unsigned int flags)
1562{
1563 ASSERT(as_operations);
1564 ASSERT(as_operations->page_table_create);
1565
1566 return as_operations->page_table_create(flags);
1567}
1568
1569/** Destroy page table.
1570 *
1571 * Destroy page table in architecture specific way.
1572 *
1573 * @param page_table Physical address of PTL0.
1574 *
1575 */
1576NO_TRACE void page_table_destroy(pte_t *page_table)
1577{
1578 ASSERT(as_operations);
1579 ASSERT(as_operations->page_table_destroy);
1580
1581 as_operations->page_table_destroy(page_table);
1582}
1583
1584/** Lock page table.
1585 *
1586 * This function should be called before any page_mapping_insert(),
1587 * page_mapping_remove() and page_mapping_find().
1588 *
1589 * Locking order is such that address space areas must be locked
1590 * prior to this call. Address space can be locked prior to this
1591 * call in which case the lock argument is false.
1592 *
1593 * @param as Address space.
1594 * @param lock If false, do not attempt to lock as->lock.
1595 *
1596 */
1597NO_TRACE void page_table_lock(as_t *as, bool lock)
1598{
1599 ASSERT(as_operations);
1600 ASSERT(as_operations->page_table_lock);
1601
1602 as_operations->page_table_lock(as, lock);
1603}
1604
1605/** Unlock page table.
1606 *
1607 * @param as Address space.
1608 * @param unlock If false, do not attempt to unlock as->lock.
1609 *
1610 */
1611NO_TRACE void page_table_unlock(as_t *as, bool unlock)
1612{
1613 ASSERT(as_operations);
1614 ASSERT(as_operations->page_table_unlock);
1615
1616 as_operations->page_table_unlock(as, unlock);
1617}
1618
1619/** Test whether page tables are locked.
1620 *
1621 * @param as Address space where the page tables belong.
1622 *
1623 * @return True if the page tables belonging to the address soace
1624 * are locked, otherwise false.
1625 */
1626NO_TRACE bool page_table_locked(as_t *as)
1627{
1628 ASSERT(as_operations);
1629 ASSERT(as_operations->page_table_locked);
1630
1631 return as_operations->page_table_locked(as);
1632}
1633
1634/** Return size of the address space area with given base.
1635 *
1636 * @param base Arbitrary address inside the address space area.
1637 *
1638 * @return Size of the address space area in bytes or zero if it
1639 * does not exist.
1640 *
1641 */
1642size_t as_area_get_size(uintptr_t base)
1643{
1644 size_t size;
1645
1646 page_table_lock(AS, true);
1647 as_area_t *src_area = find_area_and_lock(AS, base);
1648
1649 if (src_area) {
1650 size = P2SZ(src_area->pages);
1651 mutex_unlock(&src_area->lock);
1652 } else
1653 size = 0;
1654
1655 page_table_unlock(AS, true);
1656 return size;
1657}
1658
1659/** Mark portion of address space area as used.
1660 *
1661 * The address space area must be already locked.
1662 *
1663 * @param area Address space area.
1664 * @param page First page to be marked.
1665 * @param count Number of page to be marked.
1666 *
1667 * @return False on failure or true on success.
1668 *
1669 */
1670bool used_space_insert(as_area_t *area, uintptr_t page, size_t count)
1671{
1672 ASSERT(mutex_locked(&area->lock));
1673 ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
1674 ASSERT(count);
1675
1676 btree_node_t *leaf;
1677 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1678 if (pages) {
1679 /*
1680 * We hit the beginning of some used space.
1681 */
1682 return false;
1683 }
1684
1685 if (!leaf->keys) {
1686 btree_insert(&area->used_space, page, (void *) count, leaf);
1687 goto success;
1688 }
1689
1690 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
1691 if (node) {
1692 uintptr_t left_pg = node->key[node->keys - 1];
1693 uintptr_t right_pg = leaf->key[0];
1694 size_t left_cnt = (size_t) node->value[node->keys - 1];
1695 size_t right_cnt = (size_t) leaf->value[0];
1696
1697 /*
1698 * Examine the possibility that the interval fits
1699 * somewhere between the rightmost interval of
1700 * the left neigbour and the first interval of the leaf.
1701 */
1702
1703 if (page >= right_pg) {
1704 /* Do nothing. */
1705 } else if (overlaps(page, P2SZ(count), left_pg,
1706 P2SZ(left_cnt))) {
1707 /* The interval intersects with the left interval. */
1708 return false;
1709 } else if (overlaps(page, P2SZ(count), right_pg,
1710 P2SZ(right_cnt))) {
1711 /* The interval intersects with the right interval. */
1712 return false;
1713 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1714 (page + P2SZ(count) == right_pg)) {
1715 /*
1716 * The interval can be added by merging the two already
1717 * present intervals.
1718 */
1719 node->value[node->keys - 1] += count + right_cnt;
1720 btree_remove(&area->used_space, right_pg, leaf);
1721 goto success;
1722 } else if (page == left_pg + P2SZ(left_cnt)) {
1723 /*
1724 * The interval can be added by simply growing the left
1725 * interval.
1726 */
1727 node->value[node->keys - 1] += count;
1728 goto success;
1729 } else if (page + P2SZ(count) == right_pg) {
1730 /*
1731 * The interval can be addded by simply moving base of
1732 * the right interval down and increasing its size
1733 * accordingly.
1734 */
1735 leaf->value[0] += count;
1736 leaf->key[0] = page;
1737 goto success;
1738 } else {
1739 /*
1740 * The interval is between both neigbouring intervals,
1741 * but cannot be merged with any of them.
1742 */
1743 btree_insert(&area->used_space, page, (void *) count,
1744 leaf);
1745 goto success;
1746 }
1747 } else if (page < leaf->key[0]) {
1748 uintptr_t right_pg = leaf->key[0];
1749 size_t right_cnt = (size_t) leaf->value[0];
1750
1751 /*
1752 * Investigate the border case in which the left neighbour does
1753 * not exist but the interval fits from the left.
1754 */
1755
1756 if (overlaps(page, P2SZ(count), right_pg, P2SZ(right_cnt))) {
1757 /* The interval intersects with the right interval. */
1758 return false;
1759 } else if (page + P2SZ(count) == right_pg) {
1760 /*
1761 * The interval can be added by moving the base of the
1762 * right interval down and increasing its size
1763 * accordingly.
1764 */
1765 leaf->key[0] = page;
1766 leaf->value[0] += count;
1767 goto success;
1768 } else {
1769 /*
1770 * The interval doesn't adjoin with the right interval.
1771 * It must be added individually.
1772 */
1773 btree_insert(&area->used_space, page, (void *) count,
1774 leaf);
1775 goto success;
1776 }
1777 }
1778
1779 node = btree_leaf_node_right_neighbour(&area->used_space, leaf);
1780 if (node) {
1781 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1782 uintptr_t right_pg = node->key[0];
1783 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1784 size_t right_cnt = (size_t) node->value[0];
1785
1786 /*
1787 * Examine the possibility that the interval fits
1788 * somewhere between the leftmost interval of
1789 * the right neigbour and the last interval of the leaf.
1790 */
1791
1792 if (page < left_pg) {
1793 /* Do nothing. */
1794 } else if (overlaps(page, P2SZ(count), left_pg,
1795 P2SZ(left_cnt))) {
1796 /* The interval intersects with the left interval. */
1797 return false;
1798 } else if (overlaps(page, P2SZ(count), right_pg,
1799 P2SZ(right_cnt))) {
1800 /* The interval intersects with the right interval. */
1801 return false;
1802 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1803 (page + P2SZ(count) == right_pg)) {
1804 /*
1805 * The interval can be added by merging the two already
1806 * present intervals.
1807 */
1808 leaf->value[leaf->keys - 1] += count + right_cnt;
1809 btree_remove(&area->used_space, right_pg, node);
1810 goto success;
1811 } else if (page == left_pg + P2SZ(left_cnt)) {
1812 /*
1813 * The interval can be added by simply growing the left
1814 * interval.
1815 */
1816 leaf->value[leaf->keys - 1] += count;
1817 goto success;
1818 } else if (page + P2SZ(count) == right_pg) {
1819 /*
1820 * The interval can be addded by simply moving base of
1821 * the right interval down and increasing its size
1822 * accordingly.
1823 */
1824 node->value[0] += count;
1825 node->key[0] = page;
1826 goto success;
1827 } else {
1828 /*
1829 * The interval is between both neigbouring intervals,
1830 * but cannot be merged with any of them.
1831 */
1832 btree_insert(&area->used_space, page, (void *) count,
1833 leaf);
1834 goto success;
1835 }
1836 } else if (page >= leaf->key[leaf->keys - 1]) {
1837 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1838 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1839
1840 /*
1841 * Investigate the border case in which the right neighbour
1842 * does not exist but the interval fits from the right.
1843 */
1844
1845 if (overlaps(page, P2SZ(count), left_pg, P2SZ(left_cnt))) {
1846 /* The interval intersects with the left interval. */
1847 return false;
1848 } else if (left_pg + P2SZ(left_cnt) == page) {
1849 /*
1850 * The interval can be added by growing the left
1851 * interval.
1852 */
1853 leaf->value[leaf->keys - 1] += count;
1854 goto success;
1855 } else {
1856 /*
1857 * The interval doesn't adjoin with the left interval.
1858 * It must be added individually.
1859 */
1860 btree_insert(&area->used_space, page, (void *) count,
1861 leaf);
1862 goto success;
1863 }
1864 }
1865
1866 /*
1867 * Note that if the algorithm made it thus far, the interval can fit
1868 * only between two other intervals of the leaf. The two border cases
1869 * were already resolved.
1870 */
1871 btree_key_t i;
1872 for (i = 1; i < leaf->keys; i++) {
1873 if (page < leaf->key[i]) {
1874 uintptr_t left_pg = leaf->key[i - 1];
1875 uintptr_t right_pg = leaf->key[i];
1876 size_t left_cnt = (size_t) leaf->value[i - 1];
1877 size_t right_cnt = (size_t) leaf->value[i];
1878
1879 /*
1880 * The interval fits between left_pg and right_pg.
1881 */
1882
1883 if (overlaps(page, P2SZ(count), left_pg,
1884 P2SZ(left_cnt))) {
1885 /*
1886 * The interval intersects with the left
1887 * interval.
1888 */
1889 return false;
1890 } else if (overlaps(page, P2SZ(count), right_pg,
1891 P2SZ(right_cnt))) {
1892 /*
1893 * The interval intersects with the right
1894 * interval.
1895 */
1896 return false;
1897 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1898 (page + P2SZ(count) == right_pg)) {
1899 /*
1900 * The interval can be added by merging the two
1901 * already present intervals.
1902 */
1903 leaf->value[i - 1] += count + right_cnt;
1904 btree_remove(&area->used_space, right_pg, leaf);
1905 goto success;
1906 } else if (page == left_pg + P2SZ(left_cnt)) {
1907 /*
1908 * The interval can be added by simply growing
1909 * the left interval.
1910 */
1911 leaf->value[i - 1] += count;
1912 goto success;
1913 } else if (page + P2SZ(count) == right_pg) {
1914 /*
1915 * The interval can be addded by simply moving
1916 * base of the right interval down and
1917 * increasing its size accordingly.
1918 */
1919 leaf->value[i] += count;
1920 leaf->key[i] = page;
1921 goto success;
1922 } else {
1923 /*
1924 * The interval is between both neigbouring
1925 * intervals, but cannot be merged with any of
1926 * them.
1927 */
1928 btree_insert(&area->used_space, page,
1929 (void *) count, leaf);
1930 goto success;
1931 }
1932 }
1933 }
1934
1935 panic("Inconsistency detected while adding %zu pages of used "
1936 "space at %p.", count, (void *) page);
1937
1938success:
1939 area->resident += count;
1940 return true;
1941}
1942
1943/** Mark portion of address space area as unused.
1944 *
1945 * The address space area must be already locked.
1946 *
1947 * @param area Address space area.
1948 * @param page First page to be marked.
1949 * @param count Number of page to be marked.
1950 *
1951 * @return False on failure or true on success.
1952 *
1953 */
1954bool used_space_remove(as_area_t *area, uintptr_t page, size_t count)
1955{
1956 ASSERT(mutex_locked(&area->lock));
1957 ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
1958 ASSERT(count);
1959
1960 btree_node_t *leaf;
1961 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1962 if (pages) {
1963 /*
1964 * We are lucky, page is the beginning of some interval.
1965 */
1966 if (count > pages) {
1967 return false;
1968 } else if (count == pages) {
1969 btree_remove(&area->used_space, page, leaf);
1970 goto success;
1971 } else {
1972 /*
1973 * Find the respective interval.
1974 * Decrease its size and relocate its start address.
1975 */
1976 btree_key_t i;
1977 for (i = 0; i < leaf->keys; i++) {
1978 if (leaf->key[i] == page) {
1979 leaf->key[i] += P2SZ(count);
1980 leaf->value[i] -= count;
1981 goto success;
1982 }
1983 }
1984
1985 goto error;
1986 }
1987 }
1988
1989 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space,
1990 leaf);
1991 if ((node) && (page < leaf->key[0])) {
1992 uintptr_t left_pg = node->key[node->keys - 1];
1993 size_t left_cnt = (size_t) node->value[node->keys - 1];
1994
1995 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
1996 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
1997 /*
1998 * The interval is contained in the rightmost
1999 * interval of the left neighbour and can be
2000 * removed by updating the size of the bigger
2001 * interval.
2002 */
2003 node->value[node->keys - 1] -= count;
2004 goto success;
2005 } else if (page + P2SZ(count) <
2006 left_pg + P2SZ(left_cnt)) {
2007 size_t new_cnt;
2008
2009 /*
2010 * The interval is contained in the rightmost
2011 * interval of the left neighbour but its
2012 * removal requires both updating the size of
2013 * the original interval and also inserting a
2014 * new interval.
2015 */
2016 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2017 (page + P2SZ(count))) >> PAGE_WIDTH;
2018 node->value[node->keys - 1] -= count + new_cnt;
2019 btree_insert(&area->used_space, page +
2020 P2SZ(count), (void *) new_cnt, leaf);
2021 goto success;
2022 }
2023 }
2024
2025 return false;
2026 } else if (page < leaf->key[0])
2027 return false;
2028
2029 if (page > leaf->key[leaf->keys - 1]) {
2030 uintptr_t left_pg = leaf->key[leaf->keys - 1];
2031 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
2032
2033 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2034 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2035 /*
2036 * The interval is contained in the rightmost
2037 * interval of the leaf and can be removed by
2038 * updating the size of the bigger interval.
2039 */
2040 leaf->value[leaf->keys - 1] -= count;
2041 goto success;
2042 } else if (page + P2SZ(count) < left_pg +
2043 P2SZ(left_cnt)) {
2044 size_t new_cnt;
2045
2046 /*
2047 * The interval is contained in the rightmost
2048 * interval of the leaf but its removal
2049 * requires both updating the size of the
2050 * original interval and also inserting a new
2051 * interval.
2052 */
2053 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2054 (page + P2SZ(count))) >> PAGE_WIDTH;
2055 leaf->value[leaf->keys - 1] -= count + new_cnt;
2056 btree_insert(&area->used_space, page +
2057 P2SZ(count), (void *) new_cnt, leaf);
2058 goto success;
2059 }
2060 }
2061
2062 return false;
2063 }
2064
2065 /*
2066 * The border cases have been already resolved.
2067 * Now the interval can be only between intervals of the leaf.
2068 */
2069 btree_key_t i;
2070 for (i = 1; i < leaf->keys - 1; i++) {
2071 if (page < leaf->key[i]) {
2072 uintptr_t left_pg = leaf->key[i - 1];
2073 size_t left_cnt = (size_t) leaf->value[i - 1];
2074
2075 /*
2076 * Now the interval is between intervals corresponding
2077 * to (i - 1) and i.
2078 */
2079 if (overlaps(left_pg, P2SZ(left_cnt), page,
2080 P2SZ(count))) {
2081 if (page + P2SZ(count) ==
2082 left_pg + P2SZ(left_cnt)) {
2083 /*
2084 * The interval is contained in the
2085 * interval (i - 1) of the leaf and can
2086 * be removed by updating the size of
2087 * the bigger interval.
2088 */
2089 leaf->value[i - 1] -= count;
2090 goto success;
2091 } else if (page + P2SZ(count) <
2092 left_pg + P2SZ(left_cnt)) {
2093 size_t new_cnt;
2094
2095 /*
2096 * The interval is contained in the
2097 * interval (i - 1) of the leaf but its
2098 * removal requires both updating the
2099 * size of the original interval and
2100 * also inserting a new interval.
2101 */
2102 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2103 (page + P2SZ(count))) >>
2104 PAGE_WIDTH;
2105 leaf->value[i - 1] -= count + new_cnt;
2106 btree_insert(&area->used_space, page +
2107 P2SZ(count), (void *) new_cnt,
2108 leaf);
2109 goto success;
2110 }
2111 }
2112
2113 return false;
2114 }
2115 }
2116
2117error:
2118 panic("Inconsistency detected while removing %zu pages of used "
2119 "space from %p.", count, (void *) page);
2120
2121success:
2122 area->resident -= count;
2123 return true;
2124}
2125
2126/*
2127 * Address space related syscalls.
2128 */
2129
2130sysarg_t sys_as_area_create(uintptr_t base, size_t size, unsigned int flags,
2131 uintptr_t bound)
2132{
2133 uintptr_t virt = base;
2134 as_area_t *area = as_area_create(AS, flags | AS_AREA_CACHEABLE, size,
2135 AS_AREA_ATTR_NONE, &anon_backend, NULL, &virt, bound);
2136 if (area == NULL)
2137 return (sysarg_t) -1;
2138
2139 return (sysarg_t) virt;
2140}
2141
2142sysarg_t sys_as_area_resize(uintptr_t address, size_t size, unsigned int flags)
2143{
2144 return (sysarg_t) as_area_resize(AS, address, size, 0);
2145}
2146
2147sysarg_t sys_as_area_change_flags(uintptr_t address, unsigned int flags)
2148{
2149 return (sysarg_t) as_area_change_flags(AS, flags, address);
2150}
2151
2152sysarg_t sys_as_area_destroy(uintptr_t address)
2153{
2154 return (sysarg_t) as_area_destroy(AS, address);
2155}
2156
2157/** Get list of adress space areas.
2158 *
2159 * @param as Address space.
2160 * @param obuf Place to save pointer to returned buffer.
2161 * @param osize Place to save size of returned buffer.
2162 *
2163 */
2164void as_get_area_info(as_t *as, as_area_info_t **obuf, size_t *osize)
2165{
2166 mutex_lock(&as->lock);
2167
2168 /* First pass, count number of areas. */
2169
2170 size_t area_cnt = 0;
2171
2172 list_foreach(as->as_area_btree.leaf_list, cur) {
2173 btree_node_t *node =
2174 list_get_instance(cur, btree_node_t, leaf_link);
2175 area_cnt += node->keys;
2176 }
2177
2178 size_t isize = area_cnt * sizeof(as_area_info_t);
2179 as_area_info_t *info = malloc(isize, 0);
2180
2181 /* Second pass, record data. */
2182
2183 size_t area_idx = 0;
2184
2185 list_foreach(as->as_area_btree.leaf_list, cur) {
2186 btree_node_t *node =
2187 list_get_instance(cur, btree_node_t, leaf_link);
2188 btree_key_t i;
2189
2190 for (i = 0; i < node->keys; i++) {
2191 as_area_t *area = node->value[i];
2192
2193 ASSERT(area_idx < area_cnt);
2194 mutex_lock(&area->lock);
2195
2196 info[area_idx].start_addr = area->base;
2197 info[area_idx].size = P2SZ(area->pages);
2198 info[area_idx].flags = area->flags;
2199 ++area_idx;
2200
2201 mutex_unlock(&area->lock);
2202 }
2203 }
2204
2205 mutex_unlock(&as->lock);
2206
2207 *obuf = info;
2208 *osize = isize;
2209}
2210
2211/** Print out information about address space.
2212 *
2213 * @param as Address space.
2214 *
2215 */
2216void as_print(as_t *as)
2217{
2218 mutex_lock(&as->lock);
2219
2220 /* Print out info about address space areas */
2221 list_foreach(as->as_area_btree.leaf_list, cur) {
2222 btree_node_t *node
2223 = list_get_instance(cur, btree_node_t, leaf_link);
2224 btree_key_t i;
2225
2226 for (i = 0; i < node->keys; i++) {
2227 as_area_t *area = node->value[i];
2228
2229 mutex_lock(&area->lock);
2230 printf("as_area: %p, base=%p, pages=%zu"
2231 " (%p - %p)\n", area, (void *) area->base,
2232 area->pages, (void *) area->base,
2233 (void *) (area->base + P2SZ(area->pages)));
2234 mutex_unlock(&area->lock);
2235 }
2236 }
2237
2238 mutex_unlock(&as->lock);
2239}
2240
2241/** @}
2242 */
Note: See TracBrowser for help on using the repository browser.