source: mainline/kernel/generic/src/mm/as.c@ feeac0d

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since feeac0d was feeac0d, checked in by Jiri Svoboda <jiri@…>, 12 years ago

Simplify use of list_foreach.

  • Property mode set to 100644
File size: 55.5 KB
Line 
1/*
2 * Copyright (c) 2010 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup genericmm
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Address space related functions.
36 *
37 * This file contains address space manipulation functions.
38 * Roughly speaking, this is a higher-level client of
39 * Virtual Address Translation (VAT) subsystem.
40 *
41 * Functionality provided by this file allows one to
42 * create address spaces and create, resize and share
43 * address space areas.
44 *
45 * @see page.c
46 *
47 */
48
49#include <mm/as.h>
50#include <arch/mm/as.h>
51#include <mm/page.h>
52#include <mm/frame.h>
53#include <mm/slab.h>
54#include <mm/tlb.h>
55#include <arch/mm/page.h>
56#include <genarch/mm/page_pt.h>
57#include <genarch/mm/page_ht.h>
58#include <mm/asid.h>
59#include <arch/mm/asid.h>
60#include <preemption.h>
61#include <synch/spinlock.h>
62#include <synch/mutex.h>
63#include <adt/list.h>
64#include <adt/btree.h>
65#include <proc/task.h>
66#include <proc/thread.h>
67#include <arch/asm.h>
68#include <panic.h>
69#include <debug.h>
70#include <print.h>
71#include <memstr.h>
72#include <macros.h>
73#include <bitops.h>
74#include <arch.h>
75#include <errno.h>
76#include <config.h>
77#include <align.h>
78#include <typedefs.h>
79#include <syscall/copy.h>
80#include <arch/interrupt.h>
81#include <interrupt.h>
82
83/**
84 * Each architecture decides what functions will be used to carry out
85 * address space operations such as creating or locking page tables.
86 */
87as_operations_t *as_operations = NULL;
88
89/** Slab for as_t objects.
90 *
91 */
92static slab_cache_t *as_slab;
93
94/** ASID subsystem lock.
95 *
96 * This lock protects:
97 * - inactive_as_with_asid_list
98 * - as->asid for each as of the as_t type
99 * - asids_allocated counter
100 *
101 */
102SPINLOCK_INITIALIZE(asidlock);
103
104/**
105 * Inactive address spaces (on all processors)
106 * that have valid ASID.
107 */
108LIST_INITIALIZE(inactive_as_with_asid_list);
109
110/** Kernel address space. */
111as_t *AS_KERNEL = NULL;
112
113NO_TRACE static int as_constructor(void *obj, unsigned int flags)
114{
115 as_t *as = (as_t *) obj;
116
117 link_initialize(&as->inactive_as_with_asid_link);
118 mutex_initialize(&as->lock, MUTEX_PASSIVE);
119
120 return as_constructor_arch(as, flags);
121}
122
123NO_TRACE static size_t as_destructor(void *obj)
124{
125 return as_destructor_arch((as_t *) obj);
126}
127
128/** Initialize address space subsystem. */
129void as_init(void)
130{
131 as_arch_init();
132
133 as_slab = slab_cache_create("as_t", sizeof(as_t), 0,
134 as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
135
136 AS_KERNEL = as_create(FLAG_AS_KERNEL);
137 if (!AS_KERNEL)
138 panic("Cannot create kernel address space.");
139
140 /*
141 * Make sure the kernel address space
142 * reference count never drops to zero.
143 */
144 as_hold(AS_KERNEL);
145}
146
147/** Create address space.
148 *
149 * @param flags Flags that influence the way in wich the address
150 * space is created.
151 *
152 */
153as_t *as_create(unsigned int flags)
154{
155 as_t *as = (as_t *) slab_alloc(as_slab, 0);
156 (void) as_create_arch(as, 0);
157
158 btree_create(&as->as_area_btree);
159
160 if (flags & FLAG_AS_KERNEL)
161 as->asid = ASID_KERNEL;
162 else
163 as->asid = ASID_INVALID;
164
165 atomic_set(&as->refcount, 0);
166 as->cpu_refcount = 0;
167
168#ifdef AS_PAGE_TABLE
169 as->genarch.page_table = page_table_create(flags);
170#else
171 page_table_create(flags);
172#endif
173
174 return as;
175}
176
177/** Destroy adress space.
178 *
179 * When there are no tasks referencing this address space (i.e. its refcount is
180 * zero), the address space can be destroyed.
181 *
182 * We know that we don't hold any spinlock.
183 *
184 * @param as Address space to be destroyed.
185 *
186 */
187void as_destroy(as_t *as)
188{
189 DEADLOCK_PROBE_INIT(p_asidlock);
190
191 ASSERT(as != AS);
192 ASSERT(atomic_get(&as->refcount) == 0);
193
194 /*
195 * Since there is no reference to this address space, it is safe not to
196 * lock its mutex.
197 */
198
199 /*
200 * We need to avoid deadlock between TLB shootdown and asidlock.
201 * We therefore try to take asid conditionally and if we don't succeed,
202 * we enable interrupts and try again. This is done while preemption is
203 * disabled to prevent nested context switches. We also depend on the
204 * fact that so far no spinlocks are held.
205 */
206 preemption_disable();
207 ipl_t ipl = interrupts_read();
208
209retry:
210 interrupts_disable();
211 if (!spinlock_trylock(&asidlock)) {
212 interrupts_enable();
213 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
214 goto retry;
215 }
216
217 /* Interrupts disabled, enable preemption */
218 preemption_enable();
219
220 if ((as->asid != ASID_INVALID) && (as != AS_KERNEL)) {
221 if (as->cpu_refcount == 0)
222 list_remove(&as->inactive_as_with_asid_link);
223
224 asid_put(as->asid);
225 }
226
227 spinlock_unlock(&asidlock);
228 interrupts_restore(ipl);
229
230
231 /*
232 * Destroy address space areas of the address space.
233 * The B+tree must be walked carefully because it is
234 * also being destroyed.
235 */
236 bool cond = true;
237 while (cond) {
238 ASSERT(!list_empty(&as->as_area_btree.leaf_list));
239
240 btree_node_t *node =
241 list_get_instance(list_first(&as->as_area_btree.leaf_list),
242 btree_node_t, leaf_link);
243
244 if ((cond = node->keys))
245 as_area_destroy(as, node->key[0]);
246 }
247
248 btree_destroy(&as->as_area_btree);
249
250#ifdef AS_PAGE_TABLE
251 page_table_destroy(as->genarch.page_table);
252#else
253 page_table_destroy(NULL);
254#endif
255
256 slab_free(as_slab, as);
257}
258
259/** Hold a reference to an address space.
260 *
261 * Holding a reference to an address space prevents destruction
262 * of that address space.
263 *
264 * @param as Address space to be held.
265 *
266 */
267NO_TRACE void as_hold(as_t *as)
268{
269 atomic_inc(&as->refcount);
270}
271
272/** Release a reference to an address space.
273 *
274 * The last one to release a reference to an address space
275 * destroys the address space.
276 *
277 * @param asAddress space to be released.
278 *
279 */
280NO_TRACE void as_release(as_t *as)
281{
282 if (atomic_predec(&as->refcount) == 0)
283 as_destroy(as);
284}
285
286/** Check area conflicts with other areas.
287 *
288 * @param as Address space.
289 * @param addr Starting virtual address of the area being tested.
290 * @param count Number of pages in the area being tested.
291 * @param guarded True if the area being tested is protected by guard pages.
292 * @param avoid Do not touch this area.
293 *
294 * @return True if there is no conflict, false otherwise.
295 *
296 */
297NO_TRACE static bool check_area_conflicts(as_t *as, uintptr_t addr,
298 size_t count, bool guarded, as_area_t *avoid)
299{
300 ASSERT((addr % PAGE_SIZE) == 0);
301 ASSERT(mutex_locked(&as->lock));
302
303 /*
304 * If the addition of the supposed area address and size overflows,
305 * report conflict.
306 */
307 if (overflows_into_positive(addr, P2SZ(count)))
308 return false;
309
310 /*
311 * We don't want any area to have conflicts with NULL page.
312 */
313 if (overlaps(addr, P2SZ(count), (uintptr_t) NULL, PAGE_SIZE))
314 return false;
315
316 /*
317 * The leaf node is found in O(log n), where n is proportional to
318 * the number of address space areas belonging to as.
319 * The check for conflicts is then attempted on the rightmost
320 * record in the left neighbour, the leftmost record in the right
321 * neighbour and all records in the leaf node itself.
322 */
323 btree_node_t *leaf;
324 as_area_t *area =
325 (as_area_t *) btree_search(&as->as_area_btree, addr, &leaf);
326 if (area) {
327 if (area != avoid)
328 return false;
329 }
330
331 /* First, check the two border cases. */
332 btree_node_t *node =
333 btree_leaf_node_left_neighbour(&as->as_area_btree, leaf);
334 if (node) {
335 area = (as_area_t *) node->value[node->keys - 1];
336
337 if (area != avoid) {
338 mutex_lock(&area->lock);
339
340 /*
341 * If at least one of the two areas are protected
342 * by the AS_AREA_GUARD flag then we must be sure
343 * that they are separated by at least one unmapped
344 * page.
345 */
346 int const gp = (guarded ||
347 (area->flags & AS_AREA_GUARD)) ? 1 : 0;
348
349 /*
350 * The area comes from the left neighbour node, which
351 * means that there already are some areas in the leaf
352 * node, which in turn means that adding gp is safe and
353 * will not cause an integer overflow.
354 */
355 if (overlaps(addr, P2SZ(count), area->base,
356 P2SZ(area->pages + gp))) {
357 mutex_unlock(&area->lock);
358 return false;
359 }
360
361 mutex_unlock(&area->lock);
362 }
363 }
364
365 node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf);
366 if (node) {
367 area = (as_area_t *) node->value[0];
368
369 if (area != avoid) {
370 int gp;
371
372 mutex_lock(&area->lock);
373
374 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
375 if (gp && overflows(addr, P2SZ(count))) {
376 /*
377 * Guard page not needed if the supposed area
378 * is adjacent to the end of the address space.
379 * We already know that the following test is
380 * going to fail...
381 */
382 gp--;
383 }
384
385 if (overlaps(addr, P2SZ(count + gp), area->base,
386 P2SZ(area->pages))) {
387 mutex_unlock(&area->lock);
388 return false;
389 }
390
391 mutex_unlock(&area->lock);
392 }
393 }
394
395 /* Second, check the leaf node. */
396 btree_key_t i;
397 for (i = 0; i < leaf->keys; i++) {
398 area = (as_area_t *) leaf->value[i];
399 int agp;
400 int gp;
401
402 if (area == avoid)
403 continue;
404
405 mutex_lock(&area->lock);
406
407 gp = (guarded || (area->flags & AS_AREA_GUARD)) ? 1 : 0;
408 agp = gp;
409
410 /*
411 * Sanitize the two possible unsigned integer overflows.
412 */
413 if (gp && overflows(addr, P2SZ(count)))
414 gp--;
415 if (agp && overflows(area->base, P2SZ(area->pages)))
416 agp--;
417
418 if (overlaps(addr, P2SZ(count + gp), area->base,
419 P2SZ(area->pages + agp))) {
420 mutex_unlock(&area->lock);
421 return false;
422 }
423
424 mutex_unlock(&area->lock);
425 }
426
427 /*
428 * So far, the area does not conflict with other areas.
429 * Check if it is contained in the user address space.
430 */
431 if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
432 return iswithin(USER_ADDRESS_SPACE_START,
433 (USER_ADDRESS_SPACE_END - USER_ADDRESS_SPACE_START) + 1,
434 addr, P2SZ(count));
435 }
436
437 return true;
438}
439
440/** Return pointer to unmapped address space area
441 *
442 * The address space must be already locked when calling
443 * this function.
444 *
445 * @param as Address space.
446 * @param bound Lowest address bound.
447 * @param size Requested size of the allocation.
448 * @param guarded True if the allocation must be protected by guard pages.
449 *
450 * @return Address of the beginning of unmapped address space area.
451 * @return -1 if no suitable address space area was found.
452 *
453 */
454NO_TRACE static uintptr_t as_get_unmapped_area(as_t *as, uintptr_t bound,
455 size_t size, bool guarded)
456{
457 ASSERT(mutex_locked(&as->lock));
458
459 if (size == 0)
460 return (uintptr_t) -1;
461
462 /*
463 * Make sure we allocate from page-aligned
464 * address. Check for possible overflow in
465 * each step.
466 */
467
468 size_t pages = SIZE2FRAMES(size);
469
470 /*
471 * Find the lowest unmapped address aligned on the size
472 * boundary, not smaller than bound and of the required size.
473 */
474
475 /* First check the bound address itself */
476 uintptr_t addr = ALIGN_UP(bound, PAGE_SIZE);
477 if (addr >= bound) {
478 if (guarded) {
479 /* Leave an unmapped page between the lower
480 * bound and the area's start address.
481 */
482 addr += P2SZ(1);
483 }
484
485 if (check_area_conflicts(as, addr, pages, guarded, NULL))
486 return addr;
487 }
488
489 /* Eventually check the addresses behind each area */
490 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t, node) {
491
492 for (btree_key_t i = 0; i < node->keys; i++) {
493 as_area_t *area = (as_area_t *) node->value[i];
494
495 mutex_lock(&area->lock);
496
497 addr =
498 ALIGN_UP(area->base + P2SZ(area->pages), PAGE_SIZE);
499
500 if (guarded || area->flags & AS_AREA_GUARD) {
501 /* We must leave an unmapped page
502 * between the two areas.
503 */
504 addr += P2SZ(1);
505 }
506
507 bool avail =
508 ((addr >= bound) && (addr >= area->base) &&
509 (check_area_conflicts(as, addr, pages, guarded, area)));
510
511 mutex_unlock(&area->lock);
512
513 if (avail)
514 return addr;
515 }
516 }
517
518 /* No suitable address space area found */
519 return (uintptr_t) -1;
520}
521
522/** Create address space area of common attributes.
523 *
524 * The created address space area is added to the target address space.
525 *
526 * @param as Target address space.
527 * @param flags Flags of the area memory.
528 * @param size Size of area.
529 * @param attrs Attributes of the area.
530 * @param backend Address space area backend. NULL if no backend is used.
531 * @param backend_data NULL or a pointer to an array holding two void *.
532 * @param base Starting virtual address of the area.
533 * If set to -1, a suitable mappable area is found.
534 * @param bound Lowest address bound if base is set to -1.
535 * Otherwise ignored.
536 *
537 * @return Address space area on success or NULL on failure.
538 *
539 */
540as_area_t *as_area_create(as_t *as, unsigned int flags, size_t size,
541 unsigned int attrs, mem_backend_t *backend,
542 mem_backend_data_t *backend_data, uintptr_t *base, uintptr_t bound)
543{
544 if ((*base != (uintptr_t) -1) && !IS_ALIGNED(*base, PAGE_SIZE))
545 return NULL;
546
547 if (size == 0)
548 return NULL;
549
550 size_t pages = SIZE2FRAMES(size);
551
552 /* Writeable executable areas are not supported. */
553 if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
554 return NULL;
555
556 bool const guarded = flags & AS_AREA_GUARD;
557
558 mutex_lock(&as->lock);
559
560 if (*base == (uintptr_t) -1) {
561 *base = as_get_unmapped_area(as, bound, size, guarded);
562 if (*base == (uintptr_t) -1) {
563 mutex_unlock(&as->lock);
564 return NULL;
565 }
566 }
567
568 if (overflows_into_positive(*base, size))
569 return NULL;
570
571 if (!check_area_conflicts(as, *base, pages, guarded, NULL)) {
572 mutex_unlock(&as->lock);
573 return NULL;
574 }
575
576 as_area_t *area = (as_area_t *) malloc(sizeof(as_area_t), 0);
577
578 mutex_initialize(&area->lock, MUTEX_PASSIVE);
579
580 area->as = as;
581 area->flags = flags;
582 area->attributes = attrs;
583 area->pages = pages;
584 area->resident = 0;
585 area->base = *base;
586 area->sh_info = NULL;
587 area->backend = backend;
588
589 if (backend_data)
590 area->backend_data = *backend_data;
591 else
592 memsetb(&area->backend_data, sizeof(area->backend_data), 0);
593
594 if (area->backend && area->backend->create) {
595 if (!area->backend->create(area)) {
596 free(area);
597 mutex_unlock(&as->lock);
598 return NULL;
599 }
600 }
601
602 btree_create(&area->used_space);
603 btree_insert(&as->as_area_btree, *base, (void *) area,
604 NULL);
605
606 mutex_unlock(&as->lock);
607
608 return area;
609}
610
611/** Find address space area and lock it.
612 *
613 * @param as Address space.
614 * @param va Virtual address.
615 *
616 * @return Locked address space area containing va on success or
617 * NULL on failure.
618 *
619 */
620NO_TRACE static as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
621{
622 ASSERT(mutex_locked(&as->lock));
623
624 btree_node_t *leaf;
625 as_area_t *area = (as_area_t *) btree_search(&as->as_area_btree, va,
626 &leaf);
627 if (area) {
628 /* va is the base address of an address space area */
629 mutex_lock(&area->lock);
630 return area;
631 }
632
633 /*
634 * Search the leaf node and the rightmost record of its left neighbour
635 * to find out whether this is a miss or va belongs to an address
636 * space area found there.
637 */
638
639 /* First, search the leaf node itself. */
640 btree_key_t i;
641
642 for (i = 0; i < leaf->keys; i++) {
643 area = (as_area_t *) leaf->value[i];
644
645 mutex_lock(&area->lock);
646
647 if ((area->base <= va) &&
648 (va <= area->base + (P2SZ(area->pages) - 1)))
649 return area;
650
651 mutex_unlock(&area->lock);
652 }
653
654 /*
655 * Second, locate the left neighbour and test its last record.
656 * Because of its position in the B+tree, it must have base < va.
657 */
658 btree_node_t *lnode = btree_leaf_node_left_neighbour(&as->as_area_btree,
659 leaf);
660 if (lnode) {
661 area = (as_area_t *) lnode->value[lnode->keys - 1];
662
663 mutex_lock(&area->lock);
664
665 if (va <= area->base + (P2SZ(area->pages) - 1))
666 return area;
667
668 mutex_unlock(&area->lock);
669 }
670
671 return NULL;
672}
673
674/** Find address space area and change it.
675 *
676 * @param as Address space.
677 * @param address Virtual address belonging to the area to be changed.
678 * Must be page-aligned.
679 * @param size New size of the virtual memory block starting at
680 * address.
681 * @param flags Flags influencing the remap operation. Currently unused.
682 *
683 * @return Zero on success or a value from @ref errno.h otherwise.
684 *
685 */
686int as_area_resize(as_t *as, uintptr_t address, size_t size, unsigned int flags)
687{
688 if (!IS_ALIGNED(address, PAGE_SIZE))
689 return EINVAL;
690
691 mutex_lock(&as->lock);
692
693 /*
694 * Locate the area.
695 */
696 as_area_t *area = find_area_and_lock(as, address);
697 if (!area) {
698 mutex_unlock(&as->lock);
699 return ENOENT;
700 }
701
702 if (!area->backend->is_resizable(area)) {
703 /*
704 * The backend does not support resizing for this area.
705 */
706 mutex_unlock(&area->lock);
707 mutex_unlock(&as->lock);
708 return ENOTSUP;
709 }
710
711 if (area->sh_info) {
712 /*
713 * Remapping of shared address space areas
714 * is not supported.
715 */
716 mutex_unlock(&area->lock);
717 mutex_unlock(&as->lock);
718 return ENOTSUP;
719 }
720
721 size_t pages = SIZE2FRAMES((address - area->base) + size);
722 if (!pages) {
723 /*
724 * Zero size address space areas are not allowed.
725 */
726 mutex_unlock(&area->lock);
727 mutex_unlock(&as->lock);
728 return EPERM;
729 }
730
731 if (pages < area->pages) {
732 uintptr_t start_free = area->base + P2SZ(pages);
733
734 /*
735 * Shrinking the area.
736 * No need to check for overlaps.
737 */
738
739 page_table_lock(as, false);
740
741 /*
742 * Remove frames belonging to used space starting from
743 * the highest addresses downwards until an overlap with
744 * the resized address space area is found. Note that this
745 * is also the right way to remove part of the used_space
746 * B+tree leaf list.
747 */
748 bool cond = true;
749 while (cond) {
750 ASSERT(!list_empty(&area->used_space.leaf_list));
751
752 btree_node_t *node =
753 list_get_instance(list_last(&area->used_space.leaf_list),
754 btree_node_t, leaf_link);
755
756 if ((cond = (bool) node->keys)) {
757 uintptr_t ptr = node->key[node->keys - 1];
758 size_t size =
759 (size_t) node->value[node->keys - 1];
760 size_t i = 0;
761
762 if (overlaps(ptr, P2SZ(size), area->base,
763 P2SZ(pages))) {
764
765 if (ptr + P2SZ(size) <= start_free) {
766 /*
767 * The whole interval fits
768 * completely in the resized
769 * address space area.
770 */
771 break;
772 }
773
774 /*
775 * Part of the interval corresponding
776 * to b and c overlaps with the resized
777 * address space area.
778 */
779
780 /* We are almost done */
781 cond = false;
782 i = (start_free - ptr) >> PAGE_WIDTH;
783 if (!used_space_remove(area, start_free,
784 size - i))
785 panic("Cannot remove used space.");
786 } else {
787 /*
788 * The interval of used space can be
789 * completely removed.
790 */
791 if (!used_space_remove(area, ptr, size))
792 panic("Cannot remove used space.");
793 }
794
795 /*
796 * Start TLB shootdown sequence.
797 *
798 * The sequence is rather short and can be
799 * repeated multiple times. The reason is that
800 * we don't want to have used_space_remove()
801 * inside the sequence as it may use a blocking
802 * memory allocation for its B+tree. Blocking
803 * while holding the tlblock spinlock is
804 * forbidden and would hit a kernel assertion.
805 */
806
807 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES,
808 as->asid, area->base + P2SZ(pages),
809 area->pages - pages);
810
811 for (; i < size; i++) {
812 pte_t *pte = page_mapping_find(as,
813 ptr + P2SZ(i), false);
814
815 ASSERT(pte);
816 ASSERT(PTE_VALID(pte));
817 ASSERT(PTE_PRESENT(pte));
818
819 if ((area->backend) &&
820 (area->backend->frame_free)) {
821 area->backend->frame_free(area,
822 ptr + P2SZ(i),
823 PTE_GET_FRAME(pte));
824 }
825
826 page_mapping_remove(as, ptr + P2SZ(i));
827 }
828
829 /*
830 * Finish TLB shootdown sequence.
831 */
832
833 tlb_invalidate_pages(as->asid,
834 area->base + P2SZ(pages),
835 area->pages - pages);
836
837 /*
838 * Invalidate software translation caches
839 * (e.g. TSB on sparc64, PHT on ppc32).
840 */
841 as_invalidate_translation_cache(as,
842 area->base + P2SZ(pages),
843 area->pages - pages);
844 tlb_shootdown_finalize(ipl);
845 }
846 }
847 page_table_unlock(as, false);
848 } else {
849 /*
850 * Growing the area.
851 */
852
853 if (overflows_into_positive(address, P2SZ(pages)))
854 return EINVAL;
855
856 /*
857 * Check for overlaps with other address space areas.
858 */
859 bool const guarded = area->flags & AS_AREA_GUARD;
860 if (!check_area_conflicts(as, address, pages, guarded, area)) {
861 mutex_unlock(&area->lock);
862 mutex_unlock(&as->lock);
863 return EADDRNOTAVAIL;
864 }
865 }
866
867 if (area->backend && area->backend->resize) {
868 if (!area->backend->resize(area, pages)) {
869 mutex_unlock(&area->lock);
870 mutex_unlock(&as->lock);
871 return ENOMEM;
872 }
873 }
874
875 area->pages = pages;
876
877 mutex_unlock(&area->lock);
878 mutex_unlock(&as->lock);
879
880 return 0;
881}
882
883/** Remove reference to address space area share info.
884 *
885 * If the reference count drops to 0, the sh_info is deallocated.
886 *
887 * @param sh_info Pointer to address space area share info.
888 *
889 */
890NO_TRACE static void sh_info_remove_reference(share_info_t *sh_info)
891{
892 bool dealloc = false;
893
894 mutex_lock(&sh_info->lock);
895 ASSERT(sh_info->refcount);
896
897 if (--sh_info->refcount == 0) {
898 dealloc = true;
899
900 /*
901 * Now walk carefully the pagemap B+tree and free/remove
902 * reference from all frames found there.
903 */
904 list_foreach(sh_info->pagemap.leaf_list, leaf_link,
905 btree_node_t, node) {
906 btree_key_t i;
907
908 for (i = 0; i < node->keys; i++)
909 frame_free((uintptr_t) node->value[i]);
910 }
911
912 }
913 mutex_unlock(&sh_info->lock);
914
915 if (dealloc) {
916 btree_destroy(&sh_info->pagemap);
917 free(sh_info);
918 }
919}
920
921/** Destroy address space area.
922 *
923 * @param as Address space.
924 * @param address Address within the area to be deleted.
925 *
926 * @return Zero on success or a value from @ref errno.h on failure.
927 *
928 */
929int as_area_destroy(as_t *as, uintptr_t address)
930{
931 mutex_lock(&as->lock);
932
933 as_area_t *area = find_area_and_lock(as, address);
934 if (!area) {
935 mutex_unlock(&as->lock);
936 return ENOENT;
937 }
938
939 if (area->backend && area->backend->destroy)
940 area->backend->destroy(area);
941
942 uintptr_t base = area->base;
943
944 page_table_lock(as, false);
945
946 /*
947 * Start TLB shootdown sequence.
948 */
949 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
950 area->pages);
951
952 /*
953 * Visit only the pages mapped by used_space B+tree.
954 */
955 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
956 node) {
957 btree_key_t i;
958
959 for (i = 0; i < node->keys; i++) {
960 uintptr_t ptr = node->key[i];
961 size_t size;
962
963 for (size = 0; size < (size_t) node->value[i]; size++) {
964 pte_t *pte = page_mapping_find(as,
965 ptr + P2SZ(size), false);
966
967 ASSERT(pte);
968 ASSERT(PTE_VALID(pte));
969 ASSERT(PTE_PRESENT(pte));
970
971 if ((area->backend) &&
972 (area->backend->frame_free)) {
973 area->backend->frame_free(area,
974 ptr + P2SZ(size),
975 PTE_GET_FRAME(pte));
976 }
977
978 page_mapping_remove(as, ptr + P2SZ(size));
979 }
980 }
981 }
982
983 /*
984 * Finish TLB shootdown sequence.
985 */
986
987 tlb_invalidate_pages(as->asid, area->base, area->pages);
988
989 /*
990 * Invalidate potential software translation caches
991 * (e.g. TSB on sparc64, PHT on ppc32).
992 */
993 as_invalidate_translation_cache(as, area->base, area->pages);
994 tlb_shootdown_finalize(ipl);
995
996 page_table_unlock(as, false);
997
998 btree_destroy(&area->used_space);
999
1000 area->attributes |= AS_AREA_ATTR_PARTIAL;
1001
1002 if (area->sh_info)
1003 sh_info_remove_reference(area->sh_info);
1004
1005 mutex_unlock(&area->lock);
1006
1007 /*
1008 * Remove the empty area from address space.
1009 */
1010 btree_remove(&as->as_area_btree, base, NULL);
1011
1012 free(area);
1013
1014 mutex_unlock(&as->lock);
1015 return 0;
1016}
1017
1018/** Share address space area with another or the same address space.
1019 *
1020 * Address space area mapping is shared with a new address space area.
1021 * If the source address space area has not been shared so far,
1022 * a new sh_info is created. The new address space area simply gets the
1023 * sh_info of the source area. The process of duplicating the
1024 * mapping is done through the backend share function.
1025 *
1026 * @param src_as Pointer to source address space.
1027 * @param src_base Base address of the source address space area.
1028 * @param acc_size Expected size of the source area.
1029 * @param dst_as Pointer to destination address space.
1030 * @param dst_flags_mask Destination address space area flags mask.
1031 * @param dst_base Target base address. If set to -1,
1032 * a suitable mappable area is found.
1033 * @param bound Lowest address bound if dst_base is set to -1.
1034 * Otherwise ignored.
1035 *
1036 * @return Zero on success.
1037 * @return ENOENT if there is no such task or such address space.
1038 * @return EPERM if there was a problem in accepting the area.
1039 * @return ENOMEM if there was a problem in allocating destination
1040 * address space area.
1041 * @return ENOTSUP if the address space area backend does not support
1042 * sharing.
1043 *
1044 */
1045int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
1046 as_t *dst_as, unsigned int dst_flags_mask, uintptr_t *dst_base,
1047 uintptr_t bound)
1048{
1049 mutex_lock(&src_as->lock);
1050 as_area_t *src_area = find_area_and_lock(src_as, src_base);
1051 if (!src_area) {
1052 /*
1053 * Could not find the source address space area.
1054 */
1055 mutex_unlock(&src_as->lock);
1056 return ENOENT;
1057 }
1058
1059 if (!src_area->backend->is_shareable(src_area)) {
1060 /*
1061 * The backend does not permit sharing of this area.
1062 */
1063 mutex_unlock(&src_area->lock);
1064 mutex_unlock(&src_as->lock);
1065 return ENOTSUP;
1066 }
1067
1068 size_t src_size = P2SZ(src_area->pages);
1069 unsigned int src_flags = src_area->flags;
1070 mem_backend_t *src_backend = src_area->backend;
1071 mem_backend_data_t src_backend_data = src_area->backend_data;
1072
1073 /* Share the cacheable flag from the original mapping */
1074 if (src_flags & AS_AREA_CACHEABLE)
1075 dst_flags_mask |= AS_AREA_CACHEABLE;
1076
1077 if ((src_size != acc_size) ||
1078 ((src_flags & dst_flags_mask) != dst_flags_mask)) {
1079 mutex_unlock(&src_area->lock);
1080 mutex_unlock(&src_as->lock);
1081 return EPERM;
1082 }
1083
1084 /*
1085 * Now we are committed to sharing the area.
1086 * First, prepare the area for sharing.
1087 * Then it will be safe to unlock it.
1088 */
1089 share_info_t *sh_info = src_area->sh_info;
1090 if (!sh_info) {
1091 sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0);
1092 mutex_initialize(&sh_info->lock, MUTEX_PASSIVE);
1093 sh_info->refcount = 2;
1094 btree_create(&sh_info->pagemap);
1095 src_area->sh_info = sh_info;
1096
1097 /*
1098 * Call the backend to setup sharing.
1099 */
1100 src_area->backend->share(src_area);
1101 } else {
1102 mutex_lock(&sh_info->lock);
1103 sh_info->refcount++;
1104 mutex_unlock(&sh_info->lock);
1105 }
1106
1107 mutex_unlock(&src_area->lock);
1108 mutex_unlock(&src_as->lock);
1109
1110 /*
1111 * Create copy of the source address space area.
1112 * The destination area is created with AS_AREA_ATTR_PARTIAL
1113 * attribute set which prevents race condition with
1114 * preliminary as_page_fault() calls.
1115 * The flags of the source area are masked against dst_flags_mask
1116 * to support sharing in less privileged mode.
1117 */
1118 as_area_t *dst_area = as_area_create(dst_as, dst_flags_mask,
1119 src_size, AS_AREA_ATTR_PARTIAL, src_backend,
1120 &src_backend_data, dst_base, bound);
1121 if (!dst_area) {
1122 /*
1123 * Destination address space area could not be created.
1124 */
1125 sh_info_remove_reference(sh_info);
1126
1127 return ENOMEM;
1128 }
1129
1130 /*
1131 * Now the destination address space area has been
1132 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
1133 * attribute and set the sh_info.
1134 */
1135 mutex_lock(&dst_as->lock);
1136 mutex_lock(&dst_area->lock);
1137 dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
1138 dst_area->sh_info = sh_info;
1139 mutex_unlock(&dst_area->lock);
1140 mutex_unlock(&dst_as->lock);
1141
1142 return 0;
1143}
1144
1145/** Check access mode for address space area.
1146 *
1147 * @param area Address space area.
1148 * @param access Access mode.
1149 *
1150 * @return False if access violates area's permissions, true
1151 * otherwise.
1152 *
1153 */
1154NO_TRACE bool as_area_check_access(as_area_t *area, pf_access_t access)
1155{
1156 ASSERT(mutex_locked(&area->lock));
1157
1158 int flagmap[] = {
1159 [PF_ACCESS_READ] = AS_AREA_READ,
1160 [PF_ACCESS_WRITE] = AS_AREA_WRITE,
1161 [PF_ACCESS_EXEC] = AS_AREA_EXEC
1162 };
1163
1164 if (!(area->flags & flagmap[access]))
1165 return false;
1166
1167 return true;
1168}
1169
1170/** Convert address space area flags to page flags.
1171 *
1172 * @param aflags Flags of some address space area.
1173 *
1174 * @return Flags to be passed to page_mapping_insert().
1175 *
1176 */
1177NO_TRACE static unsigned int area_flags_to_page_flags(unsigned int aflags)
1178{
1179 unsigned int flags = PAGE_USER | PAGE_PRESENT;
1180
1181 if (aflags & AS_AREA_READ)
1182 flags |= PAGE_READ;
1183
1184 if (aflags & AS_AREA_WRITE)
1185 flags |= PAGE_WRITE;
1186
1187 if (aflags & AS_AREA_EXEC)
1188 flags |= PAGE_EXEC;
1189
1190 if (aflags & AS_AREA_CACHEABLE)
1191 flags |= PAGE_CACHEABLE;
1192
1193 return flags;
1194}
1195
1196/** Change adress space area flags.
1197 *
1198 * The idea is to have the same data, but with a different access mode.
1199 * This is needed e.g. for writing code into memory and then executing it.
1200 * In order for this to work properly, this may copy the data
1201 * into private anonymous memory (unless it's already there).
1202 *
1203 * @param as Address space.
1204 * @param flags Flags of the area memory.
1205 * @param address Address within the area to be changed.
1206 *
1207 * @return Zero on success or a value from @ref errno.h on failure.
1208 *
1209 */
1210int as_area_change_flags(as_t *as, unsigned int flags, uintptr_t address)
1211{
1212 /* Flags for the new memory mapping */
1213 unsigned int page_flags = area_flags_to_page_flags(flags);
1214
1215 mutex_lock(&as->lock);
1216
1217 as_area_t *area = find_area_and_lock(as, address);
1218 if (!area) {
1219 mutex_unlock(&as->lock);
1220 return ENOENT;
1221 }
1222
1223 if ((area->sh_info) || (area->backend != &anon_backend)) {
1224 /* Copying shared areas not supported yet */
1225 /* Copying non-anonymous memory not supported yet */
1226 mutex_unlock(&area->lock);
1227 mutex_unlock(&as->lock);
1228 return ENOTSUP;
1229 }
1230
1231 /*
1232 * Compute total number of used pages in the used_space B+tree
1233 */
1234 size_t used_pages = 0;
1235
1236 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1237 node) {
1238 btree_key_t i;
1239
1240 for (i = 0; i < node->keys; i++)
1241 used_pages += (size_t) node->value[i];
1242 }
1243
1244 /* An array for storing frame numbers */
1245 uintptr_t *old_frame = malloc(used_pages * sizeof(uintptr_t), 0);
1246
1247 page_table_lock(as, false);
1248
1249 /*
1250 * Start TLB shootdown sequence.
1251 */
1252 ipl_t ipl = tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base,
1253 area->pages);
1254
1255 /*
1256 * Remove used pages from page tables and remember their frame
1257 * numbers.
1258 */
1259 size_t frame_idx = 0;
1260
1261 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1262 node) {
1263 btree_key_t i;
1264
1265 for (i = 0; i < node->keys; i++) {
1266 uintptr_t ptr = node->key[i];
1267 size_t size;
1268
1269 for (size = 0; size < (size_t) node->value[i]; size++) {
1270 pte_t *pte = page_mapping_find(as,
1271 ptr + P2SZ(size), false);
1272
1273 ASSERT(pte);
1274 ASSERT(PTE_VALID(pte));
1275 ASSERT(PTE_PRESENT(pte));
1276
1277 old_frame[frame_idx++] = PTE_GET_FRAME(pte);
1278
1279 /* Remove old mapping */
1280 page_mapping_remove(as, ptr + P2SZ(size));
1281 }
1282 }
1283 }
1284
1285 /*
1286 * Finish TLB shootdown sequence.
1287 */
1288
1289 tlb_invalidate_pages(as->asid, area->base, area->pages);
1290
1291 /*
1292 * Invalidate potential software translation caches
1293 * (e.g. TSB on sparc64, PHT on ppc32).
1294 */
1295 as_invalidate_translation_cache(as, area->base, area->pages);
1296 tlb_shootdown_finalize(ipl);
1297
1298 page_table_unlock(as, false);
1299
1300 /*
1301 * Set the new flags.
1302 */
1303 area->flags = flags;
1304
1305 /*
1306 * Map pages back in with new flags. This step is kept separate
1307 * so that the memory area could not be accesed with both the old and
1308 * the new flags at once.
1309 */
1310 frame_idx = 0;
1311
1312 list_foreach(area->used_space.leaf_list, leaf_link, btree_node_t,
1313 node) {
1314 btree_key_t i;
1315
1316 for (i = 0; i < node->keys; i++) {
1317 uintptr_t ptr = node->key[i];
1318 size_t size;
1319
1320 for (size = 0; size < (size_t) node->value[i]; size++) {
1321 page_table_lock(as, false);
1322
1323 /* Insert the new mapping */
1324 page_mapping_insert(as, ptr + P2SZ(size),
1325 old_frame[frame_idx++], page_flags);
1326
1327 page_table_unlock(as, false);
1328 }
1329 }
1330 }
1331
1332 free(old_frame);
1333
1334 mutex_unlock(&area->lock);
1335 mutex_unlock(&as->lock);
1336
1337 return 0;
1338}
1339
1340/** Handle page fault within the current address space.
1341 *
1342 * This is the high-level page fault handler. It decides whether the page fault
1343 * can be resolved by any backend and if so, it invokes the backend to resolve
1344 * the page fault.
1345 *
1346 * Interrupts are assumed disabled.
1347 *
1348 * @param address Faulting address.
1349 * @param access Access mode that caused the page fault (i.e.
1350 * read/write/exec).
1351 * @param istate Pointer to the interrupted state.
1352 *
1353 * @return AS_PF_FAULT on page fault.
1354 * @return AS_PF_OK on success.
1355 * @return AS_PF_DEFER if the fault was caused by copy_to_uspace()
1356 * or copy_from_uspace().
1357 *
1358 */
1359int as_page_fault(uintptr_t address, pf_access_t access, istate_t *istate)
1360{
1361 uintptr_t page = ALIGN_DOWN(address, PAGE_SIZE);
1362 int rc = AS_PF_FAULT;
1363
1364 if (!THREAD)
1365 goto page_fault;
1366
1367 if (!AS)
1368 goto page_fault;
1369
1370 mutex_lock(&AS->lock);
1371 as_area_t *area = find_area_and_lock(AS, page);
1372 if (!area) {
1373 /*
1374 * No area contained mapping for 'page'.
1375 * Signal page fault to low-level handler.
1376 */
1377 mutex_unlock(&AS->lock);
1378 goto page_fault;
1379 }
1380
1381 if (area->attributes & AS_AREA_ATTR_PARTIAL) {
1382 /*
1383 * The address space area is not fully initialized.
1384 * Avoid possible race by returning error.
1385 */
1386 mutex_unlock(&area->lock);
1387 mutex_unlock(&AS->lock);
1388 goto page_fault;
1389 }
1390
1391 if ((!area->backend) || (!area->backend->page_fault)) {
1392 /*
1393 * The address space area is not backed by any backend
1394 * or the backend cannot handle page faults.
1395 */
1396 mutex_unlock(&area->lock);
1397 mutex_unlock(&AS->lock);
1398 goto page_fault;
1399 }
1400
1401 page_table_lock(AS, false);
1402
1403 /*
1404 * To avoid race condition between two page faults on the same address,
1405 * we need to make sure the mapping has not been already inserted.
1406 */
1407 pte_t *pte;
1408 if ((pte = page_mapping_find(AS, page, false))) {
1409 if (PTE_PRESENT(pte)) {
1410 if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) ||
1411 (access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) ||
1412 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) {
1413 page_table_unlock(AS, false);
1414 mutex_unlock(&area->lock);
1415 mutex_unlock(&AS->lock);
1416 return AS_PF_OK;
1417 }
1418 }
1419 }
1420
1421 /*
1422 * Resort to the backend page fault handler.
1423 */
1424 rc = area->backend->page_fault(area, page, access);
1425 if (rc != AS_PF_OK) {
1426 page_table_unlock(AS, false);
1427 mutex_unlock(&area->lock);
1428 mutex_unlock(&AS->lock);
1429 goto page_fault;
1430 }
1431
1432 page_table_unlock(AS, false);
1433 mutex_unlock(&area->lock);
1434 mutex_unlock(&AS->lock);
1435 return AS_PF_OK;
1436
1437page_fault:
1438 if (THREAD->in_copy_from_uspace) {
1439 THREAD->in_copy_from_uspace = false;
1440 istate_set_retaddr(istate,
1441 (uintptr_t) &memcpy_from_uspace_failover_address);
1442 } else if (THREAD->in_copy_to_uspace) {
1443 THREAD->in_copy_to_uspace = false;
1444 istate_set_retaddr(istate,
1445 (uintptr_t) &memcpy_to_uspace_failover_address);
1446 } else if (rc == AS_PF_SILENT) {
1447 printf("Killing task %" PRIu64 " due to a "
1448 "failed late reservation request.\n", TASK->taskid);
1449 task_kill_self(true);
1450 } else {
1451 fault_if_from_uspace(istate, "Page fault: %p.", (void *) address);
1452 panic_memtrap(istate, access, address, NULL);
1453 }
1454
1455 return AS_PF_DEFER;
1456}
1457
1458/** Switch address spaces.
1459 *
1460 * Note that this function cannot sleep as it is essentially a part of
1461 * scheduling. Sleeping here would lead to deadlock on wakeup. Another
1462 * thing which is forbidden in this context is locking the address space.
1463 *
1464 * When this function is entered, no spinlocks may be held.
1465 *
1466 * @param old Old address space or NULL.
1467 * @param new New address space.
1468 *
1469 */
1470void as_switch(as_t *old_as, as_t *new_as)
1471{
1472 DEADLOCK_PROBE_INIT(p_asidlock);
1473 preemption_disable();
1474
1475retry:
1476 (void) interrupts_disable();
1477 if (!spinlock_trylock(&asidlock)) {
1478 /*
1479 * Avoid deadlock with TLB shootdown.
1480 * We can enable interrupts here because
1481 * preemption is disabled. We should not be
1482 * holding any other lock.
1483 */
1484 (void) interrupts_enable();
1485 DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
1486 goto retry;
1487 }
1488 preemption_enable();
1489
1490 /*
1491 * First, take care of the old address space.
1492 */
1493 if (old_as) {
1494 ASSERT(old_as->cpu_refcount);
1495
1496 if ((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
1497 /*
1498 * The old address space is no longer active on
1499 * any processor. It can be appended to the
1500 * list of inactive address spaces with assigned
1501 * ASID.
1502 */
1503 ASSERT(old_as->asid != ASID_INVALID);
1504
1505 list_append(&old_as->inactive_as_with_asid_link,
1506 &inactive_as_with_asid_list);
1507 }
1508
1509 /*
1510 * Perform architecture-specific tasks when the address space
1511 * is being removed from the CPU.
1512 */
1513 as_deinstall_arch(old_as);
1514 }
1515
1516 /*
1517 * Second, prepare the new address space.
1518 */
1519 if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
1520 if (new_as->asid != ASID_INVALID)
1521 list_remove(&new_as->inactive_as_with_asid_link);
1522 else
1523 new_as->asid = asid_get();
1524 }
1525
1526#ifdef AS_PAGE_TABLE
1527 SET_PTL0_ADDRESS(new_as->genarch.page_table);
1528#endif
1529
1530 /*
1531 * Perform architecture-specific steps.
1532 * (e.g. write ASID to hardware register etc.)
1533 */
1534 as_install_arch(new_as);
1535
1536 spinlock_unlock(&asidlock);
1537
1538 AS = new_as;
1539}
1540
1541/** Compute flags for virtual address translation subsytem.
1542 *
1543 * @param area Address space area.
1544 *
1545 * @return Flags to be used in page_mapping_insert().
1546 *
1547 */
1548NO_TRACE unsigned int as_area_get_flags(as_area_t *area)
1549{
1550 ASSERT(mutex_locked(&area->lock));
1551
1552 return area_flags_to_page_flags(area->flags);
1553}
1554
1555/** Create page table.
1556 *
1557 * Depending on architecture, create either address space private or global page
1558 * table.
1559 *
1560 * @param flags Flags saying whether the page table is for the kernel
1561 * address space.
1562 *
1563 * @return First entry of the page table.
1564 *
1565 */
1566NO_TRACE pte_t *page_table_create(unsigned int flags)
1567{
1568 ASSERT(as_operations);
1569 ASSERT(as_operations->page_table_create);
1570
1571 return as_operations->page_table_create(flags);
1572}
1573
1574/** Destroy page table.
1575 *
1576 * Destroy page table in architecture specific way.
1577 *
1578 * @param page_table Physical address of PTL0.
1579 *
1580 */
1581NO_TRACE void page_table_destroy(pte_t *page_table)
1582{
1583 ASSERT(as_operations);
1584 ASSERT(as_operations->page_table_destroy);
1585
1586 as_operations->page_table_destroy(page_table);
1587}
1588
1589/** Lock page table.
1590 *
1591 * This function should be called before any page_mapping_insert(),
1592 * page_mapping_remove() and page_mapping_find().
1593 *
1594 * Locking order is such that address space areas must be locked
1595 * prior to this call. Address space can be locked prior to this
1596 * call in which case the lock argument is false.
1597 *
1598 * @param as Address space.
1599 * @param lock If false, do not attempt to lock as->lock.
1600 *
1601 */
1602NO_TRACE void page_table_lock(as_t *as, bool lock)
1603{
1604 ASSERT(as_operations);
1605 ASSERT(as_operations->page_table_lock);
1606
1607 as_operations->page_table_lock(as, lock);
1608}
1609
1610/** Unlock page table.
1611 *
1612 * @param as Address space.
1613 * @param unlock If false, do not attempt to unlock as->lock.
1614 *
1615 */
1616NO_TRACE void page_table_unlock(as_t *as, bool unlock)
1617{
1618 ASSERT(as_operations);
1619 ASSERT(as_operations->page_table_unlock);
1620
1621 as_operations->page_table_unlock(as, unlock);
1622}
1623
1624/** Test whether page tables are locked.
1625 *
1626 * @param as Address space where the page tables belong.
1627 *
1628 * @return True if the page tables belonging to the address soace
1629 * are locked, otherwise false.
1630 */
1631NO_TRACE bool page_table_locked(as_t *as)
1632{
1633 ASSERT(as_operations);
1634 ASSERT(as_operations->page_table_locked);
1635
1636 return as_operations->page_table_locked(as);
1637}
1638
1639/** Return size of the address space area with given base.
1640 *
1641 * @param base Arbitrary address inside the address space area.
1642 *
1643 * @return Size of the address space area in bytes or zero if it
1644 * does not exist.
1645 *
1646 */
1647size_t as_area_get_size(uintptr_t base)
1648{
1649 size_t size;
1650
1651 page_table_lock(AS, true);
1652 as_area_t *src_area = find_area_and_lock(AS, base);
1653
1654 if (src_area) {
1655 size = P2SZ(src_area->pages);
1656 mutex_unlock(&src_area->lock);
1657 } else
1658 size = 0;
1659
1660 page_table_unlock(AS, true);
1661 return size;
1662}
1663
1664/** Mark portion of address space area as used.
1665 *
1666 * The address space area must be already locked.
1667 *
1668 * @param area Address space area.
1669 * @param page First page to be marked.
1670 * @param count Number of page to be marked.
1671 *
1672 * @return False on failure or true on success.
1673 *
1674 */
1675bool used_space_insert(as_area_t *area, uintptr_t page, size_t count)
1676{
1677 ASSERT(mutex_locked(&area->lock));
1678 ASSERT(IS_ALIGNED(page, PAGE_SIZE));
1679 ASSERT(count);
1680
1681 btree_node_t *leaf;
1682 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1683 if (pages) {
1684 /*
1685 * We hit the beginning of some used space.
1686 */
1687 return false;
1688 }
1689
1690 if (!leaf->keys) {
1691 btree_insert(&area->used_space, page, (void *) count, leaf);
1692 goto success;
1693 }
1694
1695 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
1696 if (node) {
1697 uintptr_t left_pg = node->key[node->keys - 1];
1698 uintptr_t right_pg = leaf->key[0];
1699 size_t left_cnt = (size_t) node->value[node->keys - 1];
1700 size_t right_cnt = (size_t) leaf->value[0];
1701
1702 /*
1703 * Examine the possibility that the interval fits
1704 * somewhere between the rightmost interval of
1705 * the left neigbour and the first interval of the leaf.
1706 */
1707
1708 if (page >= right_pg) {
1709 /* Do nothing. */
1710 } else if (overlaps(page, P2SZ(count), left_pg,
1711 P2SZ(left_cnt))) {
1712 /* The interval intersects with the left interval. */
1713 return false;
1714 } else if (overlaps(page, P2SZ(count), right_pg,
1715 P2SZ(right_cnt))) {
1716 /* The interval intersects with the right interval. */
1717 return false;
1718 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1719 (page + P2SZ(count) == right_pg)) {
1720 /*
1721 * The interval can be added by merging the two already
1722 * present intervals.
1723 */
1724 node->value[node->keys - 1] += count + right_cnt;
1725 btree_remove(&area->used_space, right_pg, leaf);
1726 goto success;
1727 } else if (page == left_pg + P2SZ(left_cnt)) {
1728 /*
1729 * The interval can be added by simply growing the left
1730 * interval.
1731 */
1732 node->value[node->keys - 1] += count;
1733 goto success;
1734 } else if (page + P2SZ(count) == right_pg) {
1735 /*
1736 * The interval can be addded by simply moving base of
1737 * the right interval down and increasing its size
1738 * accordingly.
1739 */
1740 leaf->value[0] += count;
1741 leaf->key[0] = page;
1742 goto success;
1743 } else {
1744 /*
1745 * The interval is between both neigbouring intervals,
1746 * but cannot be merged with any of them.
1747 */
1748 btree_insert(&area->used_space, page, (void *) count,
1749 leaf);
1750 goto success;
1751 }
1752 } else if (page < leaf->key[0]) {
1753 uintptr_t right_pg = leaf->key[0];
1754 size_t right_cnt = (size_t) leaf->value[0];
1755
1756 /*
1757 * Investigate the border case in which the left neighbour does
1758 * not exist but the interval fits from the left.
1759 */
1760
1761 if (overlaps(page, P2SZ(count), right_pg, P2SZ(right_cnt))) {
1762 /* The interval intersects with the right interval. */
1763 return false;
1764 } else if (page + P2SZ(count) == right_pg) {
1765 /*
1766 * The interval can be added by moving the base of the
1767 * right interval down and increasing its size
1768 * accordingly.
1769 */
1770 leaf->key[0] = page;
1771 leaf->value[0] += count;
1772 goto success;
1773 } else {
1774 /*
1775 * The interval doesn't adjoin with the right interval.
1776 * It must be added individually.
1777 */
1778 btree_insert(&area->used_space, page, (void *) count,
1779 leaf);
1780 goto success;
1781 }
1782 }
1783
1784 node = btree_leaf_node_right_neighbour(&area->used_space, leaf);
1785 if (node) {
1786 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1787 uintptr_t right_pg = node->key[0];
1788 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1789 size_t right_cnt = (size_t) node->value[0];
1790
1791 /*
1792 * Examine the possibility that the interval fits
1793 * somewhere between the leftmost interval of
1794 * the right neigbour and the last interval of the leaf.
1795 */
1796
1797 if (page < left_pg) {
1798 /* Do nothing. */
1799 } else if (overlaps(page, P2SZ(count), left_pg,
1800 P2SZ(left_cnt))) {
1801 /* The interval intersects with the left interval. */
1802 return false;
1803 } else if (overlaps(page, P2SZ(count), right_pg,
1804 P2SZ(right_cnt))) {
1805 /* The interval intersects with the right interval. */
1806 return false;
1807 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1808 (page + P2SZ(count) == right_pg)) {
1809 /*
1810 * The interval can be added by merging the two already
1811 * present intervals.
1812 */
1813 leaf->value[leaf->keys - 1] += count + right_cnt;
1814 btree_remove(&area->used_space, right_pg, node);
1815 goto success;
1816 } else if (page == left_pg + P2SZ(left_cnt)) {
1817 /*
1818 * The interval can be added by simply growing the left
1819 * interval.
1820 */
1821 leaf->value[leaf->keys - 1] += count;
1822 goto success;
1823 } else if (page + P2SZ(count) == right_pg) {
1824 /*
1825 * The interval can be addded by simply moving base of
1826 * the right interval down and increasing its size
1827 * accordingly.
1828 */
1829 node->value[0] += count;
1830 node->key[0] = page;
1831 goto success;
1832 } else {
1833 /*
1834 * The interval is between both neigbouring intervals,
1835 * but cannot be merged with any of them.
1836 */
1837 btree_insert(&area->used_space, page, (void *) count,
1838 leaf);
1839 goto success;
1840 }
1841 } else if (page >= leaf->key[leaf->keys - 1]) {
1842 uintptr_t left_pg = leaf->key[leaf->keys - 1];
1843 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
1844
1845 /*
1846 * Investigate the border case in which the right neighbour
1847 * does not exist but the interval fits from the right.
1848 */
1849
1850 if (overlaps(page, P2SZ(count), left_pg, P2SZ(left_cnt))) {
1851 /* The interval intersects with the left interval. */
1852 return false;
1853 } else if (left_pg + P2SZ(left_cnt) == page) {
1854 /*
1855 * The interval can be added by growing the left
1856 * interval.
1857 */
1858 leaf->value[leaf->keys - 1] += count;
1859 goto success;
1860 } else {
1861 /*
1862 * The interval doesn't adjoin with the left interval.
1863 * It must be added individually.
1864 */
1865 btree_insert(&area->used_space, page, (void *) count,
1866 leaf);
1867 goto success;
1868 }
1869 }
1870
1871 /*
1872 * Note that if the algorithm made it thus far, the interval can fit
1873 * only between two other intervals of the leaf. The two border cases
1874 * were already resolved.
1875 */
1876 btree_key_t i;
1877 for (i = 1; i < leaf->keys; i++) {
1878 if (page < leaf->key[i]) {
1879 uintptr_t left_pg = leaf->key[i - 1];
1880 uintptr_t right_pg = leaf->key[i];
1881 size_t left_cnt = (size_t) leaf->value[i - 1];
1882 size_t right_cnt = (size_t) leaf->value[i];
1883
1884 /*
1885 * The interval fits between left_pg and right_pg.
1886 */
1887
1888 if (overlaps(page, P2SZ(count), left_pg,
1889 P2SZ(left_cnt))) {
1890 /*
1891 * The interval intersects with the left
1892 * interval.
1893 */
1894 return false;
1895 } else if (overlaps(page, P2SZ(count), right_pg,
1896 P2SZ(right_cnt))) {
1897 /*
1898 * The interval intersects with the right
1899 * interval.
1900 */
1901 return false;
1902 } else if ((page == left_pg + P2SZ(left_cnt)) &&
1903 (page + P2SZ(count) == right_pg)) {
1904 /*
1905 * The interval can be added by merging the two
1906 * already present intervals.
1907 */
1908 leaf->value[i - 1] += count + right_cnt;
1909 btree_remove(&area->used_space, right_pg, leaf);
1910 goto success;
1911 } else if (page == left_pg + P2SZ(left_cnt)) {
1912 /*
1913 * The interval can be added by simply growing
1914 * the left interval.
1915 */
1916 leaf->value[i - 1] += count;
1917 goto success;
1918 } else if (page + P2SZ(count) == right_pg) {
1919 /*
1920 * The interval can be addded by simply moving
1921 * base of the right interval down and
1922 * increasing its size accordingly.
1923 */
1924 leaf->value[i] += count;
1925 leaf->key[i] = page;
1926 goto success;
1927 } else {
1928 /*
1929 * The interval is between both neigbouring
1930 * intervals, but cannot be merged with any of
1931 * them.
1932 */
1933 btree_insert(&area->used_space, page,
1934 (void *) count, leaf);
1935 goto success;
1936 }
1937 }
1938 }
1939
1940 panic("Inconsistency detected while adding %zu pages of used "
1941 "space at %p.", count, (void *) page);
1942
1943success:
1944 area->resident += count;
1945 return true;
1946}
1947
1948/** Mark portion of address space area as unused.
1949 *
1950 * The address space area must be already locked.
1951 *
1952 * @param area Address space area.
1953 * @param page First page to be marked.
1954 * @param count Number of page to be marked.
1955 *
1956 * @return False on failure or true on success.
1957 *
1958 */
1959bool used_space_remove(as_area_t *area, uintptr_t page, size_t count)
1960{
1961 ASSERT(mutex_locked(&area->lock));
1962 ASSERT(IS_ALIGNED(page, PAGE_SIZE));
1963 ASSERT(count);
1964
1965 btree_node_t *leaf;
1966 size_t pages = (size_t) btree_search(&area->used_space, page, &leaf);
1967 if (pages) {
1968 /*
1969 * We are lucky, page is the beginning of some interval.
1970 */
1971 if (count > pages) {
1972 return false;
1973 } else if (count == pages) {
1974 btree_remove(&area->used_space, page, leaf);
1975 goto success;
1976 } else {
1977 /*
1978 * Find the respective interval.
1979 * Decrease its size and relocate its start address.
1980 */
1981 btree_key_t i;
1982 for (i = 0; i < leaf->keys; i++) {
1983 if (leaf->key[i] == page) {
1984 leaf->key[i] += P2SZ(count);
1985 leaf->value[i] -= count;
1986 goto success;
1987 }
1988 }
1989
1990 goto error;
1991 }
1992 }
1993
1994 btree_node_t *node = btree_leaf_node_left_neighbour(&area->used_space,
1995 leaf);
1996 if ((node) && (page < leaf->key[0])) {
1997 uintptr_t left_pg = node->key[node->keys - 1];
1998 size_t left_cnt = (size_t) node->value[node->keys - 1];
1999
2000 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2001 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2002 /*
2003 * The interval is contained in the rightmost
2004 * interval of the left neighbour and can be
2005 * removed by updating the size of the bigger
2006 * interval.
2007 */
2008 node->value[node->keys - 1] -= count;
2009 goto success;
2010 } else if (page + P2SZ(count) <
2011 left_pg + P2SZ(left_cnt)) {
2012 size_t new_cnt;
2013
2014 /*
2015 * The interval is contained in the rightmost
2016 * interval of the left neighbour but its
2017 * removal requires both updating the size of
2018 * the original interval and also inserting a
2019 * new interval.
2020 */
2021 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2022 (page + P2SZ(count))) >> PAGE_WIDTH;
2023 node->value[node->keys - 1] -= count + new_cnt;
2024 btree_insert(&area->used_space, page +
2025 P2SZ(count), (void *) new_cnt, leaf);
2026 goto success;
2027 }
2028 }
2029
2030 return false;
2031 } else if (page < leaf->key[0])
2032 return false;
2033
2034 if (page > leaf->key[leaf->keys - 1]) {
2035 uintptr_t left_pg = leaf->key[leaf->keys - 1];
2036 size_t left_cnt = (size_t) leaf->value[leaf->keys - 1];
2037
2038 if (overlaps(left_pg, P2SZ(left_cnt), page, P2SZ(count))) {
2039 if (page + P2SZ(count) == left_pg + P2SZ(left_cnt)) {
2040 /*
2041 * The interval is contained in the rightmost
2042 * interval of the leaf and can be removed by
2043 * updating the size of the bigger interval.
2044 */
2045 leaf->value[leaf->keys - 1] -= count;
2046 goto success;
2047 } else if (page + P2SZ(count) < left_pg +
2048 P2SZ(left_cnt)) {
2049 size_t new_cnt;
2050
2051 /*
2052 * The interval is contained in the rightmost
2053 * interval of the leaf but its removal
2054 * requires both updating the size of the
2055 * original interval and also inserting a new
2056 * interval.
2057 */
2058 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2059 (page + P2SZ(count))) >> PAGE_WIDTH;
2060 leaf->value[leaf->keys - 1] -= count + new_cnt;
2061 btree_insert(&area->used_space, page +
2062 P2SZ(count), (void *) new_cnt, leaf);
2063 goto success;
2064 }
2065 }
2066
2067 return false;
2068 }
2069
2070 /*
2071 * The border cases have been already resolved.
2072 * Now the interval can be only between intervals of the leaf.
2073 */
2074 btree_key_t i;
2075 for (i = 1; i < leaf->keys - 1; i++) {
2076 if (page < leaf->key[i]) {
2077 uintptr_t left_pg = leaf->key[i - 1];
2078 size_t left_cnt = (size_t) leaf->value[i - 1];
2079
2080 /*
2081 * Now the interval is between intervals corresponding
2082 * to (i - 1) and i.
2083 */
2084 if (overlaps(left_pg, P2SZ(left_cnt), page,
2085 P2SZ(count))) {
2086 if (page + P2SZ(count) ==
2087 left_pg + P2SZ(left_cnt)) {
2088 /*
2089 * The interval is contained in the
2090 * interval (i - 1) of the leaf and can
2091 * be removed by updating the size of
2092 * the bigger interval.
2093 */
2094 leaf->value[i - 1] -= count;
2095 goto success;
2096 } else if (page + P2SZ(count) <
2097 left_pg + P2SZ(left_cnt)) {
2098 size_t new_cnt;
2099
2100 /*
2101 * The interval is contained in the
2102 * interval (i - 1) of the leaf but its
2103 * removal requires both updating the
2104 * size of the original interval and
2105 * also inserting a new interval.
2106 */
2107 new_cnt = ((left_pg + P2SZ(left_cnt)) -
2108 (page + P2SZ(count))) >>
2109 PAGE_WIDTH;
2110 leaf->value[i - 1] -= count + new_cnt;
2111 btree_insert(&area->used_space, page +
2112 P2SZ(count), (void *) new_cnt,
2113 leaf);
2114 goto success;
2115 }
2116 }
2117
2118 return false;
2119 }
2120 }
2121
2122error:
2123 panic("Inconsistency detected while removing %zu pages of used "
2124 "space from %p.", count, (void *) page);
2125
2126success:
2127 area->resident -= count;
2128 return true;
2129}
2130
2131/*
2132 * Address space related syscalls.
2133 */
2134
2135sysarg_t sys_as_area_create(uintptr_t base, size_t size, unsigned int flags,
2136 uintptr_t bound)
2137{
2138 uintptr_t virt = base;
2139 as_area_t *area = as_area_create(AS, flags, size,
2140 AS_AREA_ATTR_NONE, &anon_backend, NULL, &virt, bound);
2141 if (area == NULL)
2142 return (sysarg_t) -1;
2143
2144 return (sysarg_t) virt;
2145}
2146
2147sysarg_t sys_as_area_resize(uintptr_t address, size_t size, unsigned int flags)
2148{
2149 return (sysarg_t) as_area_resize(AS, address, size, 0);
2150}
2151
2152sysarg_t sys_as_area_change_flags(uintptr_t address, unsigned int flags)
2153{
2154 return (sysarg_t) as_area_change_flags(AS, flags, address);
2155}
2156
2157sysarg_t sys_as_area_destroy(uintptr_t address)
2158{
2159 return (sysarg_t) as_area_destroy(AS, address);
2160}
2161
2162/** Get list of adress space areas.
2163 *
2164 * @param as Address space.
2165 * @param obuf Place to save pointer to returned buffer.
2166 * @param osize Place to save size of returned buffer.
2167 *
2168 */
2169void as_get_area_info(as_t *as, as_area_info_t **obuf, size_t *osize)
2170{
2171 mutex_lock(&as->lock);
2172
2173 /* First pass, count number of areas. */
2174
2175 size_t area_cnt = 0;
2176
2177 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t,
2178 node) {
2179 area_cnt += node->keys;
2180 }
2181
2182 size_t isize = area_cnt * sizeof(as_area_info_t);
2183 as_area_info_t *info = malloc(isize, 0);
2184
2185 /* Second pass, record data. */
2186
2187 size_t area_idx = 0;
2188
2189 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t,
2190 node) {
2191 btree_key_t i;
2192
2193 for (i = 0; i < node->keys; i++) {
2194 as_area_t *area = node->value[i];
2195
2196 ASSERT(area_idx < area_cnt);
2197 mutex_lock(&area->lock);
2198
2199 info[area_idx].start_addr = area->base;
2200 info[area_idx].size = P2SZ(area->pages);
2201 info[area_idx].flags = area->flags;
2202 ++area_idx;
2203
2204 mutex_unlock(&area->lock);
2205 }
2206 }
2207
2208 mutex_unlock(&as->lock);
2209
2210 *obuf = info;
2211 *osize = isize;
2212}
2213
2214/** Print out information about address space.
2215 *
2216 * @param as Address space.
2217 *
2218 */
2219void as_print(as_t *as)
2220{
2221 mutex_lock(&as->lock);
2222
2223 /* Print out info about address space areas */
2224 list_foreach(as->as_area_btree.leaf_list, leaf_link, btree_node_t,
2225 node) {
2226 btree_key_t i;
2227
2228 for (i = 0; i < node->keys; i++) {
2229 as_area_t *area = node->value[i];
2230
2231 mutex_lock(&area->lock);
2232 printf("as_area: %p, base=%p, pages=%zu"
2233 " (%p - %p)\n", area, (void *) area->base,
2234 area->pages, (void *) area->base,
2235 (void *) (area->base + P2SZ(area->pages)));
2236 mutex_unlock(&area->lock);
2237 }
2238 }
2239
2240 mutex_unlock(&as->lock);
2241}
2242
2243/** @}
2244 */
Note: See TracBrowser for help on using the repository browser.