/* $NetBSD: uvm_physseg.c,v 1.20 2024/01/13 09:44:42 tnn Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_page.h 7.3 (Berkeley) 4/21/91 * from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Consolidated API from uvm_page.c and others. * Consolidated and designed by Cherry G. Mathew * rbtree(3) backing implementation by: * Santhosh N. Raju */ #ifdef _KERNEL_OPT #include "opt_uvm.h" #endif #include #include #include #include #include #include #include #include #include /* * uvm_physseg: describes one segment of physical memory */ struct uvm_physseg { /* used during RB tree lookup for PHYS_TO_VM_PAGE(). */ #if defined(UVM_HOTPLUG) struct rb_node rb_node; /* tree information */ #endif paddr_t start; /* PF# of first page in segment */ paddr_t end; /* (PF# of last page in segment) + 1 */ struct vm_page *pgs; /* vm_page structures (from start) */ /* less performance sensitive fields. */ paddr_t avail_start; /* PF# of first free page in segment */ paddr_t avail_end; /* (PF# of last free page in segment) +1 */ struct extent *ext; /* extent(9) structure to manage pgs[] */ int free_list; /* which free list they belong on */ u_long start_hint; /* start looking for free pages here */ #ifdef __HAVE_PMAP_PHYSSEG struct pmap_physseg pmseg; /* pmap specific (MD) data */ #endif }; /* * These functions are reserved for uvm(9) internal use and are not * exported in the header file uvm_physseg.h * * Thus they are redefined here. */ void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); /* returns a pgs array */ struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); #if defined(UVM_HOTPLUG) /* rbtree impementation */ #define HANDLE_TO_PHYSSEG_NODE(h) ((struct uvm_physseg *)(h)) #define PHYSSEG_NODE_TO_HANDLE(u) ((uvm_physseg_t)(u)) struct uvm_physseg_graph { struct rb_tree rb_tree; /* Tree for entries */ int nentries; /* Number of entries */ } __aligned(COHERENCY_UNIT); static struct uvm_physseg_graph uvm_physseg_graph __read_mostly; /* * Note on kmem(9) allocator usage: * We take the conservative approach that plug/unplug are allowed to * fail in high memory stress situations. * * We want to avoid re-entrant situations in which one plug/unplug * operation is waiting on a previous one to complete, since this * makes the design more complicated than necessary. * * We may review this and change its behaviour, once the use cases * become more obvious. */ /* * Special alloc()/free() functions for boot time support: * We assume that alloc() at boot time is only for new 'vm_physseg's * This allows us to use a static array for memory allocation at boot * time. Thus we avoid using kmem(9) which is not ready at this point * in boot. * * After kmem(9) is ready, we use it. We currently discard any free()s * to this static array, since the size is small enough to be a * trivial waste on all architectures we run on. */ static size_t nseg = 0; static struct uvm_physseg uvm_physseg[VM_PHYSSEG_MAX]; static void * uvm_physseg_alloc(size_t sz) { /* * During boot time, we only support allocating vm_physseg * entries from the static array. * We need to assert for this. */ if (__predict_false(uvm.page_init_done == false)) { if (sz % sizeof(struct uvm_physseg)) panic("%s: tried to alloc size other than multiple" " of struct uvm_physseg at boot\n", __func__); size_t n = sz / sizeof(struct uvm_physseg); nseg += n; KASSERT(nseg > 0); KASSERT(nseg <= VM_PHYSSEG_MAX); return &uvm_physseg[nseg - n]; } return kmem_zalloc(sz, KM_NOSLEEP); } static void uvm_physseg_free(void *p, size_t sz) { /* * This is a bit tricky. We do allow simulation of free() * during boot (for eg: when MD code is "steal"ing memory, * and the segment has been exhausted (and thus needs to be * free() - ed. * free() also complicates things because we leak the * free(). Therefore calling code can't assume that free()-ed * memory is available for alloc() again, at boot time. * * Thus we can't explicitly disallow free()s during * boot time. However, the same restriction for alloc() * applies to free(). We only allow uvm_physseg related free()s * via this function during boot time. */ if (__predict_false(uvm.page_init_done == false)) { if (sz % sizeof(struct uvm_physseg)) panic("%s: tried to free size other than struct uvm_physseg" " at boot\n", __func__); } /* * Could have been in a single if(){} block - split for * clarity */ if ((struct uvm_physseg *)p >= uvm_physseg && (struct uvm_physseg *)p < (uvm_physseg + VM_PHYSSEG_MAX)) { if (sz % sizeof(struct uvm_physseg)) panic("%s: tried to free() other than struct uvm_physseg" " from static array\n", __func__); if ((sz / sizeof(struct uvm_physseg)) >= VM_PHYSSEG_MAX) panic("%s: tried to free() the entire static array!", __func__); return; /* Nothing to free */ } kmem_free(p, sz); } /* XXX: Multi page size */ bool uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp) { int preload; size_t slabpages; struct uvm_physseg *ps, *current_ps = NULL; struct vm_page *slab = NULL, *pgs = NULL; #ifdef DEBUG paddr_t off; uvm_physseg_t upm; upm = uvm_physseg_find(pfn, &off); ps = HANDLE_TO_PHYSSEG_NODE(upm); if (ps != NULL) /* XXX; do we allow "update" plugs ? */ return false; #endif /* * do we have room? */ ps = uvm_physseg_alloc(sizeof (struct uvm_physseg)); if (ps == NULL) { printf("uvm_page_physload: unable to load physical memory " "segment\n"); printf("\t%d segments allocated, ignoring 0x%"PRIxPADDR" -> 0x%"PRIxPADDR"\n", VM_PHYSSEG_MAX, pfn, pfn + pages + 1); printf("\tincrease VM_PHYSSEG_MAX\n"); return false; } /* span init */ ps->start = pfn; ps->end = pfn + pages; /* * XXX: Ugly hack because uvmexp.npages accounts for only * those pages in the segment included below as well - this * should be legacy and removed. */ ps->avail_start = ps->start; ps->avail_end = ps->end; /* * check to see if this is a "preload" (i.e. uvm_page_init hasn't been * called yet, so kmem is not available). */ preload = 1; /* We are going to assume it is a preload */ RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) { /* If there are non NULL pages then we are not in a preload */ if (current_ps->pgs != NULL) { preload = 0; /* Try to scavenge from earlier unplug()s. */ pgs = uvm_physseg_seg_alloc_from_slab(current_ps, pages); if (pgs != NULL) { break; } } } /* * if VM is already running, attempt to kmem_alloc vm_page structures */ if (!preload) { if (pgs == NULL) { /* Brand new */ /* Iteratively try alloc down from uvmexp.npages */ for (slabpages = (size_t) uvmexp.npages; slabpages >= pages; slabpages--) { slab = kmem_zalloc(sizeof *pgs * (long unsigned int)slabpages, KM_NOSLEEP); if (slab != NULL) break; } if (slab == NULL) { uvm_physseg_free(ps, sizeof(struct uvm_physseg)); return false; } uvm_physseg_seg_chomp_slab(ps, slab, (size_t) slabpages); /* We allocate enough for this plug */ pgs = uvm_physseg_seg_alloc_from_slab(ps, pages); if (pgs == NULL) { printf("unable to uvm_physseg_seg_alloc_from_slab() from backend\n"); return false; } } else { /* Reuse scavenged extent */ ps->ext = current_ps->ext; } physmem += pages; uvmpdpol_reinit(); } else { /* Boot time - see uvm_page.c:uvm_page_init() */ pgs = NULL; ps->pgs = pgs; } /* * now insert us in the proper place in uvm_physseg_graph.rb_tree */ current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps); if (current_ps != ps) { panic("uvm_page_physload: Duplicate address range detected!"); } uvm_physseg_graph.nentries++; /* * uvm_pagefree() requires the PHYS_TO_VM_PAGE(pgs[i]) on the * newly allocated pgs[] to return the correct value. This is * a bit of a chicken and egg problem, since it needs * uvm_physseg_find() to succeed. For this, the node needs to * be inserted *before* uvm_physseg_init_seg() happens. * * During boot, this happens anyway, since * uvm_physseg_init_seg() is called later on and separately * from uvm_page.c:uvm_page_init(). * In the case of hotplug we need to ensure this. */ if (__predict_true(!preload)) uvm_physseg_init_seg(ps, pgs); if (psp != NULL) *psp = ps; return true; } static int uvm_physseg_compare_nodes(void *ctx, const void *nnode1, const void *nnode2) { const struct uvm_physseg *enode1 = nnode1; const struct uvm_physseg *enode2 = nnode2; KASSERT(enode1->start < enode2->start || enode1->start >= enode2->end); KASSERT(enode2->start < enode1->start || enode2->start >= enode1->end); if (enode1->start < enode2->start) return -1; if (enode1->start >= enode2->end) return 1; return 0; } static int uvm_physseg_compare_key(void *ctx, const void *nnode, const void *pkey) { const struct uvm_physseg *enode = nnode; const paddr_t pa = *(const paddr_t *) pkey; if(enode->start <= pa && pa < enode->end) return 0; if (enode->start < pa) return -1; if (enode->end > pa) return 1; return 0; } static const rb_tree_ops_t uvm_physseg_tree_ops = { .rbto_compare_nodes = uvm_physseg_compare_nodes, .rbto_compare_key = uvm_physseg_compare_key, .rbto_node_offset = offsetof(struct uvm_physseg, rb_node), .rbto_context = NULL }; /* * uvm_physseg_init: init the physmem * * => physmem unit should not be in use at this point */ void uvm_physseg_init(void) { rb_tree_init(&(uvm_physseg_graph.rb_tree), &uvm_physseg_tree_ops); uvm_physseg_graph.nentries = 0; } uvm_physseg_t uvm_physseg_get_next(uvm_physseg_t upm) { /* next of invalid is invalid, not fatal */ if (uvm_physseg_valid_p(upm) == false) return UVM_PHYSSEG_TYPE_INVALID; return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm, RB_DIR_RIGHT); } uvm_physseg_t uvm_physseg_get_prev(uvm_physseg_t upm) { /* prev of invalid is invalid, not fatal */ if (uvm_physseg_valid_p(upm) == false) return UVM_PHYSSEG_TYPE_INVALID; return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm, RB_DIR_LEFT); } uvm_physseg_t uvm_physseg_get_last(void) { return (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree)); } uvm_physseg_t uvm_physseg_get_first(void) { return (uvm_physseg_t) RB_TREE_MIN(&(uvm_physseg_graph.rb_tree)); } paddr_t uvm_physseg_get_highest_frame(void) { struct uvm_physseg *ps = (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree)); return ps->end - 1; } /* * uvm_page_physunload: unload physical memory and return it to * caller. */ bool uvm_page_physunload(uvm_physseg_t upm, int freelist, paddr_t *paddrp) { struct uvm_physseg *seg; if (__predict_true(uvm.page_init_done == true)) panic("%s: unload attempted after uvm_page_init()\n", __func__); seg = HANDLE_TO_PHYSSEG_NODE(upm); if (seg->free_list != freelist) { return false; } /* * During cold boot, what we're about to unplug hasn't been * put on the uvm freelist, nor has uvmexp.npages been * updated. (This happens in uvm_page.c:uvm_page_init()) * * For hotplug, we assume here that the pages being unloaded * here are completely out of sight of uvm (ie; not on any uvm * lists), and that uvmexp.npages has been suitably * decremented before we're called. * * XXX: will avail_end == start if avail_start < avail_end? */ /* try from front */ if (seg->avail_start == seg->start && seg->avail_start < seg->avail_end) { *paddrp = ctob(seg->avail_start); return uvm_physseg_unplug(seg->avail_start, 1); } /* try from rear */ if (seg->avail_end == seg->end && seg->avail_start < seg->avail_end) { *paddrp = ctob(seg->avail_end - 1); return uvm_physseg_unplug(seg->avail_end - 1, 1); } return false; } bool uvm_page_physunload_force(uvm_physseg_t upm, int freelist, paddr_t *paddrp) { struct uvm_physseg *seg; seg = HANDLE_TO_PHYSSEG_NODE(upm); if (__predict_true(uvm.page_init_done == true)) panic("%s: unload attempted after uvm_page_init()\n", __func__); /* any room in this bank? */ if (seg->avail_start >= seg->avail_end) { return false; /* nope */ } *paddrp = ctob(seg->avail_start); /* Always unplug from front */ return uvm_physseg_unplug(seg->avail_start, 1); } /* * vm_physseg_find: find vm_physseg structure that belongs to a PA */ uvm_physseg_t uvm_physseg_find(paddr_t pframe, psize_t *offp) { struct uvm_physseg * ps = NULL; ps = rb_tree_find_node(&(uvm_physseg_graph.rb_tree), &pframe); if(ps != NULL && offp != NULL) *offp = pframe - ps->start; return ps; } #else /* UVM_HOTPLUG */ /* * physical memory config is stored in vm_physmem. */ #define VM_PHYSMEM_PTR(i) (&vm_physmem[i]) #if VM_PHYSSEG_MAX == 1 #define VM_PHYSMEM_PTR_SWAP(i, j) /* impossible */ #else #define VM_PHYSMEM_PTR_SWAP(i, j) \ do { vm_physmem[(i)] = vm_physmem[(j)]; } while (0) #endif #define HANDLE_TO_PHYSSEG_NODE(h) (VM_PHYSMEM_PTR((int)h)) #define PHYSSEG_NODE_TO_HANDLE(u) ((int)((vsize_t) (u - vm_physmem) / sizeof(struct uvm_physseg))) /* XXXCDC: uvm.physmem */ static struct uvm_physseg vm_physmem[VM_PHYSSEG_MAX] __read_mostly; /* XXXCDC: uvm.nphysseg */ static int vm_nphysseg __read_mostly = 0; #define vm_nphysmem vm_nphysseg void uvm_physseg_init(void) { /* XXX: Provisioning for rb_tree related init(s) */ return; } int uvm_physseg_get_next(uvm_physseg_t lcv) { /* next of invalid is invalid, not fatal */ if (uvm_physseg_valid_p(lcv) == false) return UVM_PHYSSEG_TYPE_INVALID; return (lcv + 1); } int uvm_physseg_get_prev(uvm_physseg_t lcv) { /* prev of invalid is invalid, not fatal */ if (uvm_physseg_valid_p(lcv) == false) return UVM_PHYSSEG_TYPE_INVALID; return (lcv - 1); } int uvm_physseg_get_last(void) { return (vm_nphysseg - 1); } int uvm_physseg_get_first(void) { return 0; } paddr_t uvm_physseg_get_highest_frame(void) { int lcv; paddr_t last = 0; struct uvm_physseg *ps; for (lcv = 0; lcv < vm_nphysseg; lcv++) { ps = VM_PHYSMEM_PTR(lcv); if (last < ps->end) last = ps->end; } return last; } static struct vm_page * uvm_post_preload_check(void) { int preload, lcv; /* * check to see if this is a "preload" (i.e. uvm_page_init hasn't been * called yet, so kmem is not available). */ for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) { if (VM_PHYSMEM_PTR(lcv)->pgs) break; } preload = (lcv == vm_nphysmem); /* * if VM is already running, attempt to kmem_alloc vm_page structures */ if (!preload) { panic("Tried to add RAM after uvm_page_init"); } return NULL; } /* * uvm_page_physunload: unload physical memory and return it to * caller. */ bool uvm_page_physunload(uvm_physseg_t psi, int freelist, paddr_t *paddrp) { int x; struct uvm_physseg *seg; uvm_post_preload_check(); seg = VM_PHYSMEM_PTR(psi); if (seg->free_list != freelist) { return false; } /* try from front */ if (seg->avail_start == seg->start && seg->avail_start < seg->avail_end) { *paddrp = ctob(seg->avail_start); seg->avail_start++; seg->start++; /* nothing left? nuke it */ if (seg->avail_start == seg->end) { if (vm_nphysmem == 1) panic("uvm_page_physget: out of memory!"); vm_nphysmem--; for (x = psi ; x < vm_nphysmem ; x++) /* structure copy */ VM_PHYSMEM_PTR_SWAP(x, x + 1); } return (true); } /* try from rear */ if (seg->avail_end == seg->end && seg->avail_start < seg->avail_end) { *paddrp = ctob(seg->avail_end - 1); seg->avail_end--; seg->end--; /* nothing left? nuke it */ if (seg->avail_end == seg->start) { if (vm_nphysmem == 1) panic("uvm_page_physget: out of memory!"); vm_nphysmem--; for (x = psi ; x < vm_nphysmem ; x++) /* structure copy */ VM_PHYSMEM_PTR_SWAP(x, x + 1); } return (true); } return false; } bool uvm_page_physunload_force(uvm_physseg_t psi, int freelist, paddr_t *paddrp) { int x; struct uvm_physseg *seg; uvm_post_preload_check(); seg = VM_PHYSMEM_PTR(psi); /* any room in this bank? */ if (seg->avail_start >= seg->avail_end) { return false; /* nope */ } *paddrp = ctob(seg->avail_start); seg->avail_start++; /* truncate! */ seg->start = seg->avail_start; /* nothing left? nuke it */ if (seg->avail_start == seg->end) { if (vm_nphysmem == 1) panic("uvm_page_physget: out of memory!"); vm_nphysmem--; for (x = psi ; x < vm_nphysmem ; x++) /* structure copy */ VM_PHYSMEM_PTR_SWAP(x, x + 1); } return (true); } bool uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp) { int lcv; struct vm_page *pgs; struct uvm_physseg *ps; #ifdef DEBUG paddr_t off; uvm_physseg_t upm; upm = uvm_physseg_find(pfn, &off); if (uvm_physseg_valid_p(upm)) /* XXX; do we allow "update" plugs ? */ return false; #endif paddr_t start = pfn; paddr_t end = pfn + pages; paddr_t avail_start = start; paddr_t avail_end = end; if (uvmexp.pagesize == 0) panic("uvm_page_physload: page size not set!"); /* * do we have room? */ if (vm_nphysmem == VM_PHYSSEG_MAX) { printf("uvm_page_physload: unable to load physical memory " "segment\n"); printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", VM_PHYSSEG_MAX, (long long)start, (long long)end); printf("\tincrease VM_PHYSSEG_MAX\n"); if (psp != NULL) *psp = UVM_PHYSSEG_TYPE_INVALID_OVERFLOW; return false; } /* * check to see if this is a "preload" (i.e. uvm_page_init hasn't been * called yet, so kmem is not available). */ pgs = uvm_post_preload_check(); /* * now insert us in the proper place in vm_physmem[] */ #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) /* random: put it at the end (easy!) */ ps = VM_PHYSMEM_PTR(vm_nphysmem); lcv = vm_nphysmem; #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) { int x; /* sort by address for binary search */ for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) if (start < VM_PHYSMEM_PTR(lcv)->start) break; ps = VM_PHYSMEM_PTR(lcv); /* move back other entries, if necessary ... */ for (x = vm_nphysmem ; x > lcv ; x--) /* structure copy */ VM_PHYSMEM_PTR_SWAP(x, x - 1); } #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) { int x; /* sort by largest segment first */ for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) if ((end - start) > (VM_PHYSMEM_PTR(lcv)->end - VM_PHYSMEM_PTR(lcv)->start)) break; ps = VM_PHYSMEM_PTR(lcv); /* move back other entries, if necessary ... */ for (x = vm_nphysmem ; x > lcv ; x--) /* structure copy */ VM_PHYSMEM_PTR_SWAP(x, x - 1); } #else panic("uvm_page_physload: unknown physseg strategy selected!"); #endif ps->start = start; ps->end = end; ps->avail_start = avail_start; ps->avail_end = avail_end; ps->pgs = pgs; vm_nphysmem++; if (psp != NULL) *psp = lcv; return true; } /* * when VM_PHYSSEG_MAX is 1, we can simplify these functions */ #if VM_PHYSSEG_MAX == 1 static inline int vm_physseg_find_contig(struct uvm_physseg *, int, paddr_t, psize_t *); #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) static inline int vm_physseg_find_bsearch(struct uvm_physseg *, int, paddr_t, psize_t *); #else static inline int vm_physseg_find_linear(struct uvm_physseg *, int, paddr_t, psize_t *); #endif /* * vm_physseg_find: find vm_physseg structure that belongs to a PA */ inline int uvm_physseg_find(paddr_t pframe, psize_t *offp) { #if VM_PHYSSEG_MAX == 1 return vm_physseg_find_contig(vm_physmem, vm_nphysseg, pframe, offp); #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) return vm_physseg_find_bsearch(vm_physmem, vm_nphysseg, pframe, offp); #else return vm_physseg_find_linear(vm_physmem, vm_nphysseg, pframe, offp); #endif } #if VM_PHYSSEG_MAX == 1 static inline int vm_physseg_find_contig(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp) { /* 'contig' case */ if (pframe >= segs[0].start && pframe < segs[0].end) { if (offp) *offp = pframe - segs[0].start; return(0); } return(-1); } #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) static inline int vm_physseg_find_bsearch(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp) { /* binary search for it */ int start, len, guess; /* * if try is too large (thus target is less than try) we reduce * the length to trunc(len/2) [i.e. everything smaller than "try"] * * if the try is too small (thus target is greater than try) then * we set the new start to be (try + 1). this means we need to * reduce the length to (round(len/2) - 1). * * note "adjust" below which takes advantage of the fact that * (round(len/2) - 1) == trunc((len - 1) / 2) * for any value of len we may have */ for (start = 0, len = nsegs ; len != 0 ; len = len / 2) { guess = start + (len / 2); /* try in the middle */ /* start past our try? */ if (pframe >= segs[guess].start) { /* was try correct? */ if (pframe < segs[guess].end) { if (offp) *offp = pframe - segs[guess].start; return guess; /* got it */ } start = guess + 1; /* next time, start here */ len--; /* "adjust" */ } else { /* * pframe before try, just reduce length of * region, done in "for" loop */ } } return(-1); } #else static inline int vm_physseg_find_linear(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp) { /* linear search for it */ int lcv; for (lcv = 0; lcv < nsegs; lcv++) { if (pframe >= segs[lcv].start && pframe < segs[lcv].end) { if (offp) *offp = pframe - segs[lcv].start; return(lcv); /* got it */ } } return(-1); } #endif #endif /* UVM_HOTPLUG */ /* * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages * back from an I/O mapping (ugh!). used in some MD code as well. it can * be prominent in flamegraphs, so optimise it and try to make it easy for * the compiler by including next to the inline lookup routines. */ struct vm_page * uvm_phys_to_vm_page(paddr_t pa) { #if VM_PHYSSEG_STRAT != VM_PSTRAT_BSEARCH /* 'contig' and linear cases */ KASSERT(vm_nphysseg > 0); struct uvm_physseg *ps = &vm_physmem[0]; struct uvm_physseg *end = &vm_physmem[vm_nphysseg]; paddr_t pframe = atop(pa); do { if (pframe >= ps->start && pframe < ps->end) { return &ps->pgs[pframe - ps->start]; } } while (VM_PHYSSEG_MAX > 1 && __predict_false(++ps < end)); return NULL; #else /* binary search for it */ paddr_t pf = atop(pa); paddr_t off; uvm_physseg_t upm; upm = uvm_physseg_find(pf, &off); if (upm != UVM_PHYSSEG_TYPE_INVALID) return uvm_physseg_get_pg(upm, off); return(NULL); #endif } bool uvm_physseg_valid_p(uvm_physseg_t upm) { struct uvm_physseg *ps; if (upm == UVM_PHYSSEG_TYPE_INVALID || upm == UVM_PHYSSEG_TYPE_INVALID_EMPTY || upm == UVM_PHYSSEG_TYPE_INVALID_OVERFLOW) return false; /* * This is the delicate init dance - * needs to go with the dance. */ if (uvm.page_init_done != true) return true; ps = HANDLE_TO_PHYSSEG_NODE(upm); /* Extra checks needed only post uvm_page_init() */ if (ps->pgs == NULL) return false; /* XXX: etc. */ return true; } /* * Boot protocol dictates that these must be able to return partially * initialised segments. */ paddr_t uvm_physseg_get_start(uvm_physseg_t upm) { if (uvm_physseg_valid_p(upm) == false) return (paddr_t) -1; return HANDLE_TO_PHYSSEG_NODE(upm)->start; } paddr_t uvm_physseg_get_end(uvm_physseg_t upm) { if (uvm_physseg_valid_p(upm) == false) return (paddr_t) -1; return HANDLE_TO_PHYSSEG_NODE(upm)->end; } paddr_t uvm_physseg_get_avail_start(uvm_physseg_t upm) { if (uvm_physseg_valid_p(upm) == false) return (paddr_t) -1; return HANDLE_TO_PHYSSEG_NODE(upm)->avail_start; } #if defined(UVM_PHYSSEG_LEGACY) void uvm_physseg_set_avail_start(uvm_physseg_t upm, paddr_t avail_start) { struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm); #if defined(DIAGNOSTIC) paddr_t avail_end; avail_end = uvm_physseg_get_avail_end(upm); KASSERT(uvm_physseg_valid_p(upm)); KASSERT(avail_start < avail_end); KASSERT(avail_start >= ps->start); #endif ps->avail_start = avail_start; } void uvm_physseg_set_avail_end(uvm_physseg_t upm, paddr_t avail_end) { struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm); #if defined(DIAGNOSTIC) paddr_t avail_start; avail_start = uvm_physseg_get_avail_start(upm); KASSERT(uvm_physseg_valid_p(upm)); KASSERT(avail_end > avail_start); KASSERT(avail_end <= ps->end); #endif ps->avail_end = avail_end; } #endif /* UVM_PHYSSEG_LEGACY */ paddr_t uvm_physseg_get_avail_end(uvm_physseg_t upm) { if (uvm_physseg_valid_p(upm) == false) return (paddr_t) -1; return HANDLE_TO_PHYSSEG_NODE(upm)->avail_end; } inline struct vm_page * uvm_physseg_get_pg(uvm_physseg_t upm, paddr_t idx) { KASSERT(uvm_physseg_valid_p(upm)); return &HANDLE_TO_PHYSSEG_NODE(upm)->pgs[idx]; } #ifdef __HAVE_PMAP_PHYSSEG struct pmap_physseg * uvm_physseg_get_pmseg(uvm_physseg_t upm) { KASSERT(uvm_physseg_valid_p(upm)); return &(HANDLE_TO_PHYSSEG_NODE(upm)->pmseg); } #endif int uvm_physseg_get_free_list(uvm_physseg_t upm) { KASSERT(uvm_physseg_valid_p(upm)); return HANDLE_TO_PHYSSEG_NODE(upm)->free_list; } u_long uvm_physseg_get_start_hint(uvm_physseg_t upm) { KASSERT(uvm_physseg_valid_p(upm)); return HANDLE_TO_PHYSSEG_NODE(upm)->start_hint; } bool uvm_physseg_set_start_hint(uvm_physseg_t upm, u_long start_hint) { if (uvm_physseg_valid_p(upm) == false) return false; HANDLE_TO_PHYSSEG_NODE(upm)->start_hint = start_hint; return true; } void uvm_physseg_init_seg(uvm_physseg_t upm, struct vm_page *pgs) { psize_t i; psize_t n; paddr_t paddr; struct uvm_physseg *seg; struct vm_page *pg; KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); KASSERT(pgs != NULL); seg = HANDLE_TO_PHYSSEG_NODE(upm); KASSERT(seg != NULL); KASSERT(seg->pgs == NULL); n = seg->end - seg->start; seg->pgs = pgs; /* init and free vm_pages (we've already zeroed them) */ paddr = ctob(seg->start); for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { pg = &seg->pgs[i]; pg->phys_addr = paddr; #ifdef __HAVE_VM_PAGE_MD VM_MDPAGE_INIT(pg); #endif if (atop(paddr) >= seg->avail_start && atop(paddr) < seg->avail_end) { uvmexp.npages++; /* add page to free pool */ uvm_page_set_freelist(pg, uvm_page_lookup_freelist(pg)); /* Disable LOCKDEBUG: too many and too early. */ mutex_init(&pg->interlock, MUTEX_NODEBUG, IPL_NONE); uvm_pagefree(pg); } } } void uvm_physseg_seg_chomp_slab(uvm_physseg_t upm, struct vm_page *pgs, size_t n) { struct uvm_physseg *seg = HANDLE_TO_PHYSSEG_NODE(upm); /* max number of pre-boot unplug()s allowed */ #define UVM_PHYSSEG_BOOT_UNPLUG_MAX VM_PHYSSEG_MAX static char btslab_ex_storage[EXTENT_FIXED_STORAGE_SIZE(UVM_PHYSSEG_BOOT_UNPLUG_MAX)]; if (__predict_false(uvm.page_init_done == false)) { seg->ext = extent_create("Boot time slab", (u_long) pgs, (u_long) (pgs + n), (void *)btslab_ex_storage, sizeof(btslab_ex_storage), 0); } else { seg->ext = extent_create("Hotplug slab", (u_long) pgs, (u_long) (pgs + n), NULL, 0, 0); } KASSERT(seg->ext != NULL); } struct vm_page * uvm_physseg_seg_alloc_from_slab(uvm_physseg_t upm, size_t pages) { int err; struct uvm_physseg *seg; struct vm_page *pgs = NULL; KASSERT(pages > 0); seg = HANDLE_TO_PHYSSEG_NODE(upm); if (__predict_false(seg->ext == NULL)) { /* * This is a situation unique to boot time. * It shouldn't happen at any point other than from * the first uvm_page.c:uvm_page_init() call * Since we're in a loop, we can get away with the * below. */ KASSERT(uvm.page_init_done != true); uvm_physseg_t upmp = uvm_physseg_get_prev(upm); KASSERT(upmp != UVM_PHYSSEG_TYPE_INVALID); seg->ext = HANDLE_TO_PHYSSEG_NODE(upmp)->ext; KASSERT(seg->ext != NULL); } /* We allocate enough for this segment */ err = extent_alloc(seg->ext, sizeof(*pgs) * pages, 1, 0, EX_BOUNDZERO, (u_long *)&pgs); if (err != 0) { #ifdef DEBUG printf("%s: extent_alloc failed with error: %d \n", __func__, err); #endif } return pgs; } /* * uvm_page_physload: load physical memory into VM system * * => all args are PFs * => all pages in start/end get vm_page structures * => areas marked by avail_start/avail_end get added to the free page pool * => we are limited to VM_PHYSSEG_MAX physical memory segments */ uvm_physseg_t uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start, paddr_t avail_end, int free_list) { struct uvm_physseg *ps; uvm_physseg_t upm; if (__predict_true(uvm.page_init_done == true)) panic("%s: unload attempted after uvm_page_init()\n", __func__); if (uvmexp.pagesize == 0) panic("uvm_page_physload: page size not set!"); if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT) panic("uvm_page_physload: bad free list %d", free_list); if (start >= end) panic("uvm_page_physload: start[%" PRIxPADDR "] >= end[%" PRIxPADDR "]", start, end); if (uvm_physseg_plug(start, end - start, &upm) == false) { panic("uvm_physseg_plug() failed at boot."); /* NOTREACHED */ return UVM_PHYSSEG_TYPE_INVALID; /* XXX: correct type */ } ps = HANDLE_TO_PHYSSEG_NODE(upm); /* Legacy */ ps->avail_start = avail_start; ps->avail_end = avail_end; ps->free_list = free_list; /* XXX: */ return upm; } bool uvm_physseg_unplug(paddr_t pfn, size_t pages) { uvm_physseg_t upm; paddr_t off = 0, start __diagused, end; struct uvm_physseg *seg; upm = uvm_physseg_find(pfn, &off); if (!uvm_physseg_valid_p(upm)) { printf("%s: Tried to unplug from unknown offset\n", __func__); return false; } seg = HANDLE_TO_PHYSSEG_NODE(upm); start = uvm_physseg_get_start(upm); end = uvm_physseg_get_end(upm); if (end < (pfn + pages)) { printf("%s: Tried to unplug oversized span \n", __func__); return false; } KASSERT(pfn == start + off); /* sanity */ if (__predict_true(uvm.page_init_done == true)) { /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */ if (extent_free(seg->ext, (u_long)(seg->pgs + off), sizeof(struct vm_page) * pages, EX_MALLOCOK | EX_NOWAIT) != 0) return false; } if (off == 0 && (pfn + pages) == end) { #if defined(UVM_HOTPLUG) /* rbtree implementation */ int segcount = 0; struct uvm_physseg *current_ps; /* Complete segment */ if (uvm_physseg_graph.nentries == 1) panic("%s: out of memory!", __func__); if (__predict_true(uvm.page_init_done == true)) { RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) { if (seg->ext == current_ps->ext) segcount++; } KASSERT(segcount > 0); if (segcount == 1) { extent_destroy(seg->ext); } /* * We assume that the unplug will succeed from * this point onwards */ uvmexp.npages -= (int) pages; } rb_tree_remove_node(&(uvm_physseg_graph.rb_tree), upm); memset(seg, 0, sizeof(struct uvm_physseg)); uvm_physseg_free(seg, sizeof(struct uvm_physseg)); uvm_physseg_graph.nentries--; #else /* UVM_HOTPLUG */ int x; if (vm_nphysmem == 1) panic("uvm_page_physget: out of memory!"); vm_nphysmem--; for (x = upm ; x < vm_nphysmem ; x++) /* structure copy */ VM_PHYSMEM_PTR_SWAP(x, x + 1); #endif /* UVM_HOTPLUG */ /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */ return true; } if (off > 0 && (pfn + pages) < end) { #if defined(UVM_HOTPLUG) /* rbtree implementation */ /* middle chunk - need a new segment */ struct uvm_physseg *ps, *current_ps; ps = uvm_physseg_alloc(sizeof (struct uvm_physseg)); if (ps == NULL) { printf("%s: Unable to allocated new fragment vm_physseg \n", __func__); return false; } /* Remove middle chunk */ if (__predict_true(uvm.page_init_done == true)) { KASSERT(seg->ext != NULL); ps->ext = seg->ext; /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */ /* * We assume that the unplug will succeed from * this point onwards */ uvmexp.npages -= (int) pages; } ps->start = pfn + pages; ps->avail_start = ps->start; /* XXX: Legacy */ ps->end = seg->end; ps->avail_end = ps->end; /* XXX: Legacy */ seg->end = pfn; seg->avail_end = seg->end; /* XXX: Legacy */ /* * The new pgs array points to the beginning of the * tail fragment. */ if (__predict_true(uvm.page_init_done == true)) ps->pgs = seg->pgs + off + pages; current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps); if (current_ps != ps) { panic("uvm_page_physload: Duplicate address range detected!"); } uvm_physseg_graph.nentries++; #else /* UVM_HOTPLUG */ panic("%s: can't unplug() from the middle of a segment without" " UVM_HOTPLUG\n", __func__); /* NOTREACHED */ #endif /* UVM_HOTPLUG */ return true; } if (off == 0 && (pfn + pages) < end) { /* Remove front chunk */ if (__predict_true(uvm.page_init_done == true)) { /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */ /* * We assume that the unplug will succeed from * this point onwards */ uvmexp.npages -= (int) pages; } /* Truncate */ seg->start = pfn + pages; seg->avail_start = seg->start; /* XXX: Legacy */ /* * Move the pgs array start to the beginning of the * tail end. */ if (__predict_true(uvm.page_init_done == true)) seg->pgs += pages; return true; } if (off > 0 && (pfn + pages) == end) { /* back chunk */ /* Truncate! */ seg->end = pfn; seg->avail_end = seg->end; /* XXX: Legacy */ uvmexp.npages -= (int) pages; return true; } printf("%s: Tried to unplug unknown range \n", __func__); return false; }