uvm_page.c source code [src/src/sys/uvm/uvm_page.c]

1	/ $NetBSD: uvm_page.c,v 1.187 2015/04/11 19:24:13 joerg Exp $ /
2
3	/*
4	* Copyright (c) 1997 Charles D. Cranor and Washington University.
5	* Copyright (c) 1991, 1993, The Regents of the University of California.
6	*
7	* All rights reserved.
8	*
9	* This code is derived from software contributed to Berkeley by
10	* The Mach Operating System project at Carnegie-Mellon University.
11	*
12	* Redistribution and use in source and binary forms, with or without
13	* modification, are permitted provided that the following conditions
14	* are met:
15	* 1. Redistributions of source code must retain the above copyright
16	* notice, this list of conditions and the following disclaimer.
17	* 2. Redistributions in binary form must reproduce the above copyright
18	* notice, this list of conditions and the following disclaimer in the
19	* documentation and/or other materials provided with the distribution.
20	* 3. Neither the name of the University nor the names of its contributors
21	* may be used to endorse or promote products derived from this software
22	* without specific prior written permission.
23	*
24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34	* SUCH DAMAGE.
35	*
36	* @(#)vm_page.c 8.3 (Berkeley) 3/21/94
37	* from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
38	*
39	*
40	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
41	* All rights reserved.
42	*
43	* Permission to use, copy, modify and distribute this software and
44	* its documentation is hereby granted, provided that both the copyright
45	* notice and this permission notice appear in all copies of the
46	* software, derivative works or modified versions, and any portions
47	* thereof, and that both notices appear in supporting documentation.
48	*
49	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52	*
53	* Carnegie Mellon requests users of this software to return to
54	*
55	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
56	* School of Computer Science
57	* Carnegie Mellon University
58	* Pittsburgh PA 15213-3890
59	*
60	* any improvements or extensions that they make and grant Carnegie the
61	* rights to redistribute these changes.
62	*/
63
64	/*
65	* uvm_page.c: page ops.
66	*/
67
68	#include <sys/cdefs.h>
69	__KERNEL_RCSID(`0`, "$NetBSD: uvm_page.c,v 1.187 2015/04/11 19:24:13 joerg Exp $");
70
71	#include "opt_ddb.h"
72	#include "opt_uvm.h"
73	#include "opt_uvmhist.h"
74	#include "opt_readahead.h"
75
76	#include <sys/param.h>
77	#include <sys/systm.h>
78	#include <sys/sched.h>
79	#include <sys/kernel.h>
80	#include <sys/vnode.h>
81	#include <sys/proc.h>
82	#include <sys/atomic.h>
83	#include <sys/cpu.h>
84
85	#include <uvm/uvm.h>
86	#include <uvm/uvm_ddb.h>
87	#include <uvm/uvm_pdpolicy.h>
88
89	/*
90	* global vars... XXXCDC: move to uvm. structure.
91	*/
92
93	/*
94	* physical memory config is stored in vm_physmem.
95	*/
96
97	struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; / XXXCDC: uvm.physmem /
98	int vm_nphysseg = `0`; / XXXCDC: uvm.nphysseg /
99	#define vm_nphysmem vm_nphysseg
100
101	/*
102	* Some supported CPUs in a given architecture don't support all
103	* of the things necessary to do idle page zero'ing efficiently.
104	* We therefore provide a way to enable it from machdep code here.
105	*/
106	bool vm_page_zero_enable = false;
107
108	/*
109	* number of pages per-CPU to reserve for the kernel.
110	*/
111	#ifndef UVM_RESERVED_PAGES_PER_CPU
112	#define UVM_RESERVED_PAGES_PER_CPU 5
113	#endif
114	int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU;
115
116	/*
117	* physical memory size;
118	*/
119	int physmem;
120
121	/*
122	* local variables
123	*/
124
125	/*
126	* these variables record the values returned by vm_page_bootstrap,
127	* for debugging purposes. The implementation of uvm_pageboot_alloc
128	* and pmap_startup here also uses them internally.
129	*/
130
131	static vaddr_t virtual_space_start;
132	static vaddr_t virtual_space_end;
133
134	/*
135	* we allocate an initial number of page colors in uvm_page_init(),
136	* and remember them. We may re-color pages as cache sizes are
137	* discovered during the autoconfiguration phase. But we can never
138	* free the initial set of buckets, since they are allocated using
139	* uvm_pageboot_alloc().
140	*/
141
142	static size_t recolored_pages_memsize / = 0 /;
143
144	#ifdef DEBUG
145	vaddr_t uvm_zerocheckkva;
146	#endif /* DEBUG */
147
148	/*
149	* local prototypes
150	*/
151
152	static void uvm_pageinsert(struct uvm_object , struct* vm_page *);
153	static void uvm_pageremove(struct uvm_object , struct* vm_page *);
154
155	/*
156	* per-object tree of pages
157	*/
158
159	static signed int
160	uvm_page_compare_nodes(void ctx, const* void n1, const* void *n2)
161	{
162	const struct vm_page *pg1 = n1;
163	const struct vm_page *pg2 = n2;
164	const voff_t a = pg1->offset;
165	const voff_t b = pg2->offset;
166
167	if (a < b)
168	return -`1`;
169	if (a > b)
170	return `1`;
171	return `0`;
172	}
173
174	static signed int
175	uvm_page_compare_key(void ctx, const* void n, const* void *key)
176	{
177	const struct vm_page *pg = n;
178	const voff_t a = pg->offset;
179	const voff_t b = (const* voff_t *)key;
180
181	if (a < b)
182	return -`1`;
183	if (a > b)
184	return `1`;
185	return `0`;
186	}
187
188	const rb_tree_ops_t uvm_page_tree_ops = {
189	.rbto_compare_nodes = uvm_page_compare_nodes,
190	.rbto_compare_key = uvm_page_compare_key,
191	.rbto_node_offset = offsetof(struct vm_page, rb_node),
192	.rbto_context = NULL
193	};
194
195	/*
196	* inline functions
197	*/
198
199	/*
200	* uvm_pageinsert: insert a page in the object.
201	*
202	* => caller must lock object
203	* => caller must lock page queues
204	* => call should have already set pg's object and offset pointers
205	* and bumped the version counter
206	*/
207
208	static inline void
209	uvm_pageinsert_list(struct uvm_object uobj, struct* vm_page *pg,
210	struct vm_page *where)
211	{
212
213	KASSERT(uobj == pg->uobject);
214	KASSERT(mutex_owned(uobj->vmobjlock));
215	KASSERT((pg->flags & PG_TABLED) == `0`);
216	KASSERT(where == NULL \|\| (where->flags & PG_TABLED));
217	KASSERT(where == NULL \|\| (where->uobject == uobj));
218
219	if (UVM_OBJ_IS_VNODE(uobj)) {
220	if (uobj->uo_npages == `0`) {
221	struct vnode vp = (struct* vnode *)uobj;
222
223	vholdl(vp);
224	}
225	if (UVM_OBJ_IS_VTEXT(uobj)) {
226	atomic_inc_uint(&uvmexp.execpages);
227	} else {
228	atomic_inc_uint(&uvmexp.filepages);
229	}
230	} else if (UVM_OBJ_IS_AOBJ(uobj)) {
231	atomic_inc_uint(&uvmexp.anonpages);
232	}
233
234	if (where)
235	TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue);
236	else
237	TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
238	pg->flags \|= PG_TABLED;
239	uobj->uo_npages++;
240	}
241
242
243	static inline void
244	uvm_pageinsert_tree(struct uvm_object uobj, struct* vm_page *pg)
245	{
246	struct vm_page *ret __diagused;
247
248	KASSERT(uobj == pg->uobject);
249	ret = rb_tree_insert_node(&uobj->rb_tree, pg);
250	KASSERT(ret == pg);
251	}
252
253	static inline void
254	uvm_pageinsert(struct uvm_object uobj, struct* vm_page *pg)
255	{
256
257	KDASSERT(uobj != NULL);
258	uvm_pageinsert_tree(uobj, pg);
259	uvm_pageinsert_list(uobj, pg, NULL);
260	}
261
262	/*
263	* uvm_page_remove: remove page from object.
264	*
265	* => caller must lock object
266	* => caller must lock page queues
267	*/
268
269	static inline void
270	uvm_pageremove_list(struct uvm_object uobj, struct* vm_page *pg)
271	{
272
273	KASSERT(uobj == pg->uobject);
274	KASSERT(mutex_owned(uobj->vmobjlock));
275	KASSERT(pg->flags & PG_TABLED);
276
277	if (UVM_OBJ_IS_VNODE(uobj)) {
278	if (uobj->uo_npages == `1`) {
279	struct vnode vp = (struct* vnode *)uobj;
280
281	holdrelel(vp);
282	}
283	if (UVM_OBJ_IS_VTEXT(uobj)) {
284	atomic_dec_uint(&uvmexp.execpages);
285	} else {
286	atomic_dec_uint(&uvmexp.filepages);
287	}
288	} else if (UVM_OBJ_IS_AOBJ(uobj)) {
289	atomic_dec_uint(&uvmexp.anonpages);
290	}
291
292	/ object should be locked /
293	uobj->uo_npages--;
294	TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
295	pg->flags &= ~PG_TABLED;
296	pg->uobject = NULL;
297	}
298
299	static inline void
300	uvm_pageremove_tree(struct uvm_object uobj, struct* vm_page *pg)
301	{
302
303	KASSERT(uobj == pg->uobject);
304	rb_tree_remove_node(&uobj->rb_tree, pg);
305	}
306
307	static inline void
308	uvm_pageremove(struct uvm_object uobj, struct* vm_page *pg)
309	{
310
311	KDASSERT(uobj != NULL);
312	uvm_pageremove_tree(uobj, pg);
313	uvm_pageremove_list(uobj, pg);
314	}
315
316	static void
317	uvm_page_init_buckets(struct pgfreelist *pgfl)
318	{
319	int color, i;
320
321	for (color = `0`; color < uvmexp.ncolors; color++) {
322	for (i = `0`; i < PGFL_NQUEUES; i++) {
323	LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
324	}
325	}
326	}
327
328	/*
329	* uvm_page_init: init the page system. called from uvm_init().
330	*
331	* => we return the range of kernel virtual memory in kvm_startp/kvm_endp
332	*/
333
334	void
335	uvm_page_init(vaddr_t kvm_startp, vaddr_t kvm_endp)
336	{
337	static struct uvm_cpu boot_cpu;
338	psize_t freepages, pagecount, bucketcount, n;
339	struct pgflbucket bucketarray, cpuarray;
340	struct vm_physseg *seg;
341	struct vm_page *pagearray;
342	int lcv;
343	u_int i;
344	paddr_t paddr;
345
346	KASSERT(ncpu <= `1`);
347	CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *));
348
349	/*
350	* init the page queues and page queue locks, except the free
351	* list; we allocate that later (with the initial vm_page
352	* structures).
353	*/
354
355	uvm.cpus[`0`] = &boot_cpu;
356	curcpu()->ci_data.cpu_uvm = &boot_cpu;
357	uvmpdpol_init();
358	mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
359	mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
360
361	/*
362	* allocate vm_page structures.
363	*/
364
365	/*
366	* sanity check:
367	* before calling this function the MD code is expected to register
368	* some free RAM with the uvm_page_physload() function. our job
369	* now is to allocate vm_page structures for this memory.
370	*/
371
372	if (vm_nphysmem == `0`)
373	panic("uvm_page_bootstrap: no memory pre-allocated");
374
375	/*
376	* first calculate the number of free pages...
377	*
378	* note that we use start/end rather than avail_start/avail_end.
379	* this allows us to allocate extra vm_page structures in case we
380	* want to return some memory to the pool after booting.
381	*/
382
383	freepages = `0`;
384	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++) {
385	seg = VM_PHYSMEM_PTR(lcv);
386	freepages += (seg->end - seg->start);
387	}
388
389	/*
390	* Let MD code initialize the number of colors, or default
391	* to 1 color if MD code doesn't care.
392	*/
393	if (uvmexp.ncolors == `0`)
394	uvmexp.ncolors = `1`;
395	uvmexp.colormask = uvmexp.ncolors - `1`;
396	KASSERT((uvmexp.colormask & uvmexp.ncolors) == `0`);
397
398	/*
399	* we now know we have (PAGE_SIZE * freepages) bytes of memory we can
400	* use. for each page of memory we use we need a vm_page structure.
401	* thus, the total number of pages we can use is the total size of
402	* the memory divided by the PAGE_SIZE plus the size of the vm_page
403	* structure. we add one to freepages as a fudge factor to avoid
404	* truncation errors (since we can only allocate in terms of whole
405	* pages).
406	*/
407
408	bucketcount = uvmexp.ncolors * VM_NFREELIST;
409	pagecount = ((freepages + `1`) << PAGE_SHIFT) /
410	(PAGE_SIZE + sizeof(struct vm_page));
411
412	bucketarray = (void )uvm_pageboot_alloc((bucketcount
413	sizeof(struct pgflbucket) * `2`) + (pagecount *
414	sizeof(struct vm_page)));
415	cpuarray = bucketarray + bucketcount;
416	pagearray = (struct vm_page )(bucketarray + bucketcount `2`);
417
418	for (lcv = `0`; lcv < VM_NFREELIST; lcv++) {
419	uvm.page_free[lcv].pgfl_buckets =
420	(bucketarray + (lcv * uvmexp.ncolors));
421	uvm_page_init_buckets(&uvm.page_free[lcv]);
422	uvm.cpus[`0`]->page_free[lcv].pgfl_buckets =
423	(cpuarray + (lcv * uvmexp.ncolors));
424	uvm_page_init_buckets(&uvm.cpus[`0`]->page_free[lcv]);
425	}
426	memset(pagearray, `0`, pagecount * sizeof(struct vm_page));
427
428	/*
429	* init the vm_page structures and put them in the correct place.
430	*/
431
432	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++) {
433	seg = VM_PHYSMEM_PTR(lcv);
434	n = seg->end - seg->start;
435
436	/ set up page array pointers /
437	seg->pgs = pagearray;
438	pagearray += n;
439	pagecount -= n;
440	seg->lastpg = seg->pgs + n;
441
442	/ init and free vm_pages (we've already zeroed them) /
443	paddr = ctob(seg->start);
444	for (i = `0` ; i < n ; i++, paddr += PAGE_SIZE) {
445	seg->pgs[i].phys_addr = paddr;
446	#ifdef __HAVE_VM_PAGE_MD
447	VM_MDPAGE_INIT(&seg->pgs[i]);
448	#endif
449	if (atop(paddr) >= seg->avail_start &&
450	atop(paddr) < seg->avail_end) {
451	uvmexp.npages++;
452	/ add page to free pool /
453	uvm_pagefree(&seg->pgs[i]);
454	}
455	}
456	}
457
458	/*
459	* pass up the values of virtual_space_start and
460	* virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
461	* layers of the VM.
462	*/
463
464	*kvm_startp = round_page(virtual_space_start);
465	*kvm_endp = trunc_page(virtual_space_end);
466	#ifdef DEBUG
467	/*
468	* steal kva for uvm_pagezerocheck().
469	*/
470	uvm_zerocheckkva = *kvm_startp;
471	*kvm_startp += PAGE_SIZE;
472	#endif /* DEBUG */
473
474	/*
475	* init various thresholds.
476	*/
477
478	uvmexp.reserve_pagedaemon = `1`;
479	uvmexp.reserve_kernel = vm_page_reserve_kernel;
480
481	/*
482	* determine if we should zero pages in the idle loop.
483	*/
484
485	uvm.cpus[`0`]->page_idle_zero = vm_page_zero_enable;
486
487	/*
488	* done!
489	*/
490
491	uvm.page_init_done = true;
492	}
493
494	/*
495	* uvm_setpagesize: set the page size
496	*
497	* => sets page_shift and page_mask from uvmexp.pagesize.
498	*/
499
500	void
501	uvm_setpagesize(void)
502	{
503
504	/*
505	* If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
506	* to be a constant (indicated by being a non-zero value).
507	*/
508	if (uvmexp.pagesize == `0`) {
509	if (PAGE_SIZE == `0`)
510	panic("uvm_setpagesize: uvmexp.pagesize not set");
511	uvmexp.pagesize = PAGE_SIZE;
512	}
513	uvmexp.pagemask = uvmexp.pagesize - `1`;
514	if ((uvmexp.pagemask & uvmexp.pagesize) != `0`)
515	panic("uvm_setpagesize: page size %u (%#x) not a power of two",
516	uvmexp.pagesize, uvmexp.pagesize);
517	for (uvmexp.pageshift = `0`; ; uvmexp.pageshift++)
518	if ((`1` << uvmexp.pageshift) == uvmexp.pagesize)
519	break;
520	}
521
522	/*
523	* uvm_pageboot_alloc: steal memory from physmem for bootstrapping
524	*/
525
526	vaddr_t
527	uvm_pageboot_alloc(vsize_t size)
528	{
529	static bool initialized = false;
530	vaddr_t addr;
531	#if !defined(PMAP_STEAL_MEMORY)
532	vaddr_t vaddr;
533	paddr_t paddr;
534	#endif
535
536	/*
537	* on first call to this function, initialize ourselves.
538	*/
539	if (initialized == false) {
540	pmap_virtual_space(&virtual_space_start, &virtual_space_end);
541
542	/ round it the way we like it /
543	virtual_space_start = round_page(virtual_space_start);
544	virtual_space_end = trunc_page(virtual_space_end);
545
546	initialized = true;
547	}
548
549	/ round to page size /
550	size = round_page(size);
551
552	#if defined(PMAP_STEAL_MEMORY)
553
554	/*
555	* defer bootstrap allocation to MD code (it may want to allocate
556	* from a direct-mapped segment). pmap_steal_memory should adjust
557	* virtual_space_start/virtual_space_end if necessary.
558	*/
559
560	addr = pmap_steal_memory(size, &virtual_space_start,
561	&virtual_space_end);
562
563	return(addr);
564
565	#else /* !PMAP_STEAL_MEMORY */
566
567	/*
568	* allocate virtual memory for this request
569	*/
570	if (virtual_space_start == virtual_space_end \|\|
571	(virtual_space_end - virtual_space_start) < size)
572	panic("uvm_pageboot_alloc: out of virtual space");
573
574	addr = virtual_space_start;
575
576	#ifdef PMAP_GROWKERNEL
577	/*
578	* If the kernel pmap can't map the requested space,
579	* then allocate more resources for it.
580	*/
581	if (uvm_maxkaddr < (addr + size)) {
582	uvm_maxkaddr = pmap_growkernel(addr + size);
583	if (uvm_maxkaddr < (addr + size))
584	panic("uvm_pageboot_alloc: pmap_growkernel() failed");
585	}
586	#endif
587
588	virtual_space_start += size;
589
590	/*
591	* allocate and mapin physical pages to back new virtual pages
592	*/
593
594	for (vaddr = round_page(addr) ; vaddr < addr + size ;
595	vaddr += PAGE_SIZE) {
596
597	if (!uvm_page_physget(&paddr))
598	panic("uvm_pageboot_alloc: out of memory");
599
600	/*
601	* Note this memory is no longer managed, so using
602	* pmap_kenter is safe.
603	*/
604	pmap_kenter_pa(vaddr, paddr, VM_PROT_READ\|VM_PROT_WRITE, `0`);
605	}
606	pmap_update(pmap_kernel());
607	return(addr);
608	#endif /* PMAP_STEAL_MEMORY */
609	}
610
611	#if !defined(PMAP_STEAL_MEMORY)
612	/*
613	* uvm_page_physget: "steal" one page from the vm_physmem structure.
614	*
615	* => attempt to allocate it off the end of a segment in which the "avail"
616	* values match the start/end values. if we can't do that, then we
617	* will advance both values (making them equal, and removing some
618	* vm_page structures from the non-avail area).
619	* => return false if out of memory.
620	*/
621
622	/ subroutine: try to allocate from memory chunks on the specified freelist /
623	static bool uvm_page_physget_freelist(paddr_t , int*);
624
625	static bool
626	uvm_page_physget_freelist(paddr_t paddrp, int* freelist)
627	{
628	struct vm_physseg *seg;
629	int lcv, x;
630
631	/ pass 1: try allocating from a matching end /
632	#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
633	for (lcv = vm_nphysmem - `1` ; lcv >= `0` ; lcv--)
634	#else
635	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++)
636	#endif
637	{
638	seg = VM_PHYSMEM_PTR(lcv);
639
640	if (uvm.page_init_done == true)
641	panic("uvm_page_physget: called _after_ bootstrap");
642
643	if (seg->free_list != freelist)
644	continue;
645
646	/ try from front /
647	if (seg->avail_start == seg->start &&
648	seg->avail_start < seg->avail_end) {
649	*paddrp = ctob(seg->avail_start);
650	seg->avail_start++;
651	seg->start++;
652	/ nothing left? nuke it /
653	if (seg->avail_start == seg->end) {
654	if (vm_nphysmem == `1`)
655	panic("uvm_page_physget: out of memory!");
656	vm_nphysmem--;
657	for (x = lcv ; x < vm_nphysmem ; x++)
658	/ structure copy /
659	VM_PHYSMEM_PTR_SWAP(x, x + `1`);
660	}
661	return (true);
662	}
663
664	/ try from rear /
665	if (seg->avail_end == seg->end &&
666	seg->avail_start < seg->avail_end) {
667	*paddrp = ctob(seg->avail_end - `1`);
668	seg->avail_end--;
669	seg->end--;
670	/ nothing left? nuke it /
671	if (seg->avail_end == seg->start) {
672	if (vm_nphysmem == `1`)
673	panic("uvm_page_physget: out of memory!");
674	vm_nphysmem--;
675	for (x = lcv ; x < vm_nphysmem ; x++)
676	/ structure copy /
677	VM_PHYSMEM_PTR_SWAP(x, x + `1`);
678	}
679	return (true);
680	}
681	}
682
683	/ pass2: forget about matching ends, just allocate something /
684	#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
685	for (lcv = vm_nphysmem - `1` ; lcv >= `0` ; lcv--)
686	#else
687	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++)
688	#endif
689	{
690	seg = VM_PHYSMEM_PTR(lcv);
691
692	/ any room in this bank? /
693	if (seg->avail_start >= seg->avail_end)
694	continue; / nope /
695
696	*paddrp = ctob(seg->avail_start);
697	seg->avail_start++;
698	/ truncate! /
699	seg->start = seg->avail_start;
700
701	/ nothing left? nuke it /
702	if (seg->avail_start == seg->end) {
703	if (vm_nphysmem == `1`)
704	panic("uvm_page_physget: out of memory!");
705	vm_nphysmem--;
706	for (x = lcv ; x < vm_nphysmem ; x++)
707	/ structure copy /
708	VM_PHYSMEM_PTR_SWAP(x, x + `1`);
709	}
710	return (true);
711	}
712
713	return (false); / whoops! /
714	}
715
716	bool
717	uvm_page_physget(paddr_t *paddrp)
718	{
719	int i;
720
721	/ try in the order of freelist preference /
722	for (i = `0`; i < VM_NFREELIST; i++)
723	if (uvm_page_physget_freelist(paddrp, i) == true)
724	return (true);
725	return (false);
726	}
727	#endif /* PMAP_STEAL_MEMORY */
728
729	/*
730	* uvm_page_physload: load physical memory into VM system
731	*
732	* => all args are PFs
733	* => all pages in start/end get vm_page structures
734	* => areas marked by avail_start/avail_end get added to the free page pool
735	* => we are limited to VM_PHYSSEG_MAX physical memory segments
736	*/
737
738	void
739	uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
740	paddr_t avail_end, int free_list)
741	{
742	int preload, lcv;
743	psize_t npages;
744	struct vm_page *pgs;
745	struct vm_physseg *ps;
746
747	if (uvmexp.pagesize == `0`)
748	panic("uvm_page_physload: page size not set!");
749	if (free_list >= VM_NFREELIST \|\| free_list < VM_FREELIST_DEFAULT)
750	panic("uvm_page_physload: bad free list %d", free_list);
751	if (start >= end)
752	panic("uvm_page_physload: start >= end");
753
754	/*
755	* do we have room?
756	*/
757
758	if (vm_nphysmem == VM_PHYSSEG_MAX) {
759	printf("uvm_page_physload: unable to load physical memory "
760	"segment\n");
761	printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
762	VM_PHYSSEG_MAX, (long long)start, (long long)end);
763	printf("\tincrease VM_PHYSSEG_MAX\n");
764	return;
765	}
766
767	/*
768	* check to see if this is a "preload" (i.e. uvm_page_init hasn't been
769	* called yet, so kmem is not available).
770	*/
771
772	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++) {
773	if (VM_PHYSMEM_PTR(lcv)->pgs)
774	break;
775	}
776	preload = (lcv == vm_nphysmem);
777
778	/*
779	* if VM is already running, attempt to kmem_alloc vm_page structures
780	*/
781
782	if (!preload) {
783	panic("uvm_page_physload: tried to add RAM after vm_mem_init");
784	} else {
785	pgs = NULL;
786	npages = `0`;
787	}
788
789	/*
790	* now insert us in the proper place in vm_physmem[]
791	*/
792
793	#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
794	/ random: put it at the end (easy!) /
795	ps = VM_PHYSMEM_PTR(vm_nphysmem);
796	#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
797	{
798	int x;
799	/ sort by address for binary search /
800	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++)
801	if (start < VM_PHYSMEM_PTR(lcv)->start)
802	break;
803	ps = VM_PHYSMEM_PTR(lcv);
804	/ move back other entries, if necessary ... /
805	for (x = vm_nphysmem ; x > lcv ; x--)
806	/ structure copy /
807	VM_PHYSMEM_PTR_SWAP(x, x - `1`);
808	}
809	#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
810	{
811	int x;
812	/ sort by largest segment first /
813	for (lcv = `0` ; lcv < vm_nphysmem ; lcv++)
814	if ((end - start) >
815	(VM_PHYSMEM_PTR(lcv)->end - VM_PHYSMEM_PTR(lcv)->start))
816	break;
817	ps = VM_PHYSMEM_PTR(lcv);
818	/ move back other entries, if necessary ... /
819	for (x = vm_nphysmem ; x > lcv ; x--)
820	/ structure copy /
821	VM_PHYSMEM_PTR_SWAP(x, x - `1`);
822	}
823	#else
824	panic("uvm_page_physload: unknown physseg strategy selected!");
825	#endif
826
827	ps->start = start;
828	ps->end = end;
829	ps->avail_start = avail_start;
830	ps->avail_end = avail_end;
831	if (preload) {
832	ps->pgs = NULL;
833	} else {
834	ps->pgs = pgs;
835	ps->lastpg = pgs + npages;
836	}
837	ps->free_list = free_list;
838	vm_nphysmem++;
839
840	if (!preload) {
841	uvmpdpol_reinit();
842	}
843	}
844
845	/*
846	* when VM_PHYSSEG_MAX is 1, we can simplify these functions
847	*/
848
849	#if VM_PHYSSEG_MAX == 1
850	static inline int vm_physseg_find_contig(struct vm_physseg , int, paddr_t, int* *);
851	#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
852	static inline int vm_physseg_find_bsearch(struct vm_physseg , int, paddr_t, int* *);
853	#else
854	static inline int vm_physseg_find_linear(struct vm_physseg , int, paddr_t, int* *);
855	#endif
856
857	/*
858	* vm_physseg_find: find vm_physseg structure that belongs to a PA
859	*/
860	int
861	vm_physseg_find(paddr_t pframe, int *offp)
862	{
863
864	#if VM_PHYSSEG_MAX == 1
865	return vm_physseg_find_contig(vm_physmem, vm_nphysseg, pframe, offp);
866	#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
867	return vm_physseg_find_bsearch(vm_physmem, vm_nphysseg, pframe, offp);
868	#else
869	return vm_physseg_find_linear(vm_physmem, vm_nphysseg, pframe, offp);
870	#endif
871	}
872
873	#if VM_PHYSSEG_MAX == 1
874	static inline int
875	vm_physseg_find_contig(struct vm_physseg segs, int* nsegs, paddr_t pframe, int *offp)
876	{
877
878	/ 'contig' case /
879	if (pframe >= segs[`0`].start && pframe < segs[`0`].end) {
880	if (offp)
881	*offp = pframe - segs[`0`].start;
882	return(`0`);
883	}
884	return(-`1`);
885	}
886
887	#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
888
889	static inline int
890	vm_physseg_find_bsearch(struct vm_physseg segs, int* nsegs, paddr_t pframe, int *offp)
891	{
892	/ binary search for it /
893	u_int start, len, guess;
894
895	/*
896	* if try is too large (thus target is less than try) we reduce
897	* the length to trunc(len/2) [i.e. everything smaller than "try"]
898	*
899	* if the try is too small (thus target is greater than try) then
900	* we set the new start to be (try + 1). this means we need to
901	* reduce the length to (round(len/2) - 1).
902	*
903	* note "adjust" below which takes advantage of the fact that
904	* (round(len/2) - 1) == trunc((len - 1) / 2)
905	* for any value of len we may have
906	*/
907
908	for (start = `0`, len = nsegs ; len != `0` ; len = len / `2`) {
909	guess = start + (len / `2`); / try in the middle /
910
911	/ start past our try? /
912	if (pframe >= segs[guess].start) {
913	/ was try correct? /
914	if (pframe < segs[guess].end) {
915	if (offp)
916	*offp = pframe - segs[guess].start;
917	return guess; / got it /
918	}
919	start = guess + `1`; / next time, start here /
920	len--; / "adjust" /
921	} else {
922	/*
923	* pframe before try, just reduce length of
924	* region, done in "for" loop
925	*/
926	}
927	}
928	return(-`1`);
929	}
930
931	#else
932
933	static inline int
934	vm_physseg_find_linear(struct vm_physseg segs, int* nsegs, paddr_t pframe, int *offp)
935	{
936	/ linear search for it /
937	int lcv;
938
939	for (lcv = `0`; lcv < nsegs; lcv++) {
940	if (pframe >= segs[lcv].start &&
941	pframe < segs[lcv].end) {
942	if (offp)
943	*offp = pframe - segs[lcv].start;
944	return(lcv); / got it /
945	}
946	}
947	return(-`1`);
948	}
949	#endif
950
951	/*
952	* PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages
953	* back from an I/O mapping (ugh!). used in some MD code as well.
954	*/
955	struct vm_page *
956	uvm_phys_to_vm_page(paddr_t pa)
957	{
958	paddr_t pf = atop(pa);
959	int off;
960	int psi;
961
962	psi = vm_physseg_find(pf, &off);
963	if (psi != -`1`)
964	return(&VM_PHYSMEM_PTR(psi)->pgs[off]);
965	return(NULL);
966	}
967
968	paddr_t
969	uvm_vm_page_to_phys(const struct vm_page *pg)
970	{
971
972	return pg->phys_addr;
973	}
974
975	/*
976	* uvm_page_recolor: Recolor the pages if the new bucket count is
977	* larger than the old one.
978	*/
979
980	void
981	uvm_page_recolor(int newncolors)
982	{
983	struct pgflbucket bucketarray, cpuarray, *oldbucketarray;
984	struct pgfreelist gpgfl, pgfl;
985	struct vm_page *pg;
986	vsize_t bucketcount;
987	size_t bucketmemsize, oldbucketmemsize;
988	int lcv, color, i, ocolors;
989	struct uvm_cpu *ucpu;
990
991	KASSERT(((newncolors - `1`) & newncolors) == `0`);
992
993	if (newncolors <= uvmexp.ncolors)
994	return;
995
996	if (uvm.page_init_done == false) {
997	uvmexp.ncolors = newncolors;
998	return;
999	}
1000
1001	bucketcount = newncolors * VM_NFREELIST;
1002	bucketmemsize = bucketcount * sizeof(struct pgflbucket) * `2`;
1003	bucketarray = kmem_alloc(bucketmemsize, KM_SLEEP);
1004	cpuarray = bucketarray + bucketcount;
1005	if (bucketarray == NULL) {
1006	printf("WARNING: unable to allocate %ld page color buckets\n",
1007	(long) bucketcount);
1008	return;
1009	}
1010
1011	mutex_spin_enter(&uvm_fpageqlock);
1012
1013	/ Make sure we should still do this. /
1014	if (newncolors <= uvmexp.ncolors) {
1015	mutex_spin_exit(&uvm_fpageqlock);
1016	kmem_free(bucketarray, bucketmemsize);
1017	return;
1018	}
1019
1020	oldbucketarray = uvm.page_free[`0`].pgfl_buckets;
1021	ocolors = uvmexp.ncolors;
1022
1023	uvmexp.ncolors = newncolors;
1024	uvmexp.colormask = uvmexp.ncolors - `1`;
1025
1026	ucpu = curcpu()->ci_data.cpu_uvm;
1027	for (lcv = `0`; lcv < VM_NFREELIST; lcv++) {
1028	gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
1029	pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors));
1030	uvm_page_init_buckets(&gpgfl);
1031	uvm_page_init_buckets(&pgfl);
1032	for (color = `0`; color < ocolors; color++) {
1033	for (i = `0`; i < PGFL_NQUEUES; i++) {
1034	while ((pg = LIST_FIRST(&uvm.page_free[
1035	lcv].pgfl_buckets[color].pgfl_queues[i]))
1036	!= NULL) {
1037	LIST_REMOVE(pg, pageq.list); / global /
1038	LIST_REMOVE(pg, listq.list); / cpu /
1039	LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[
1040	VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
1041	i], pg, pageq.list);
1042	LIST_INSERT_HEAD(&pgfl.pgfl_buckets[
1043	VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
1044	i], pg, listq.list);
1045	}
1046	}
1047	}
1048	uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets;
1049	ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
1050	}
1051
1052	oldbucketmemsize = recolored_pages_memsize;
1053
1054	recolored_pages_memsize = bucketmemsize;
1055	mutex_spin_exit(&uvm_fpageqlock);
1056
1057	if (oldbucketmemsize) {
1058	kmem_free(oldbucketarray, recolored_pages_memsize);
1059	}
1060
1061	/*
1062	* this calls uvm_km_alloc() which may want to hold
1063	* uvm_fpageqlock.
1064	*/
1065	uvm_pager_realloc_emerg();
1066	}
1067
1068	/*
1069	* uvm_cpu_attach: initialize per-CPU data structures.
1070	*/
1071
1072	void
1073	uvm_cpu_attach(struct cpu_info *ci)
1074	{
1075	struct pgflbucket *bucketarray;
1076	struct pgfreelist pgfl;
1077	struct uvm_cpu *ucpu;
1078	vsize_t bucketcount;
1079	int lcv;
1080
1081	if (CPU_IS_PRIMARY(ci)) {
1082	/ Already done in uvm_page_init(). /
1083	goto attachrnd;
1084	}
1085
1086	/ Add more reserve pages for this CPU. /
1087	uvmexp.reserve_kernel += vm_page_reserve_kernel;
1088
1089	/ Configure this CPU's free lists. /
1090	bucketcount = uvmexp.ncolors * VM_NFREELIST;
1091	bucketarray = kmem_alloc(bucketcount * sizeof(struct pgflbucket),
1092	KM_SLEEP);
1093	ucpu = kmem_zalloc(sizeof(*ucpu), KM_SLEEP);
1094	uvm.cpus[cpu_index(ci)] = ucpu;
1095	ci->ci_data.cpu_uvm = ucpu;
1096	for (lcv = `0`; lcv < VM_NFREELIST; lcv++) {
1097	pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors));
1098	uvm_page_init_buckets(&pgfl);
1099	ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
1100	}
1101
1102	attachrnd:
1103	/*
1104	* Attach RNG source for this CPU's VM events
1105	*/
1106	rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs,
1107	ci->ci_data.cpu_name, RND_TYPE_VM,
1108	RND_FLAG_COLLECT_TIME\|RND_FLAG_COLLECT_VALUE\|
1109	RND_FLAG_ESTIMATE_VALUE);
1110
1111	}
1112
1113	/*
1114	* uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
1115	*/
1116
1117	static struct vm_page *
1118	uvm_pagealloc_pgfl(struct uvm_cpu ucpu, int* flist, int try1, int try2,
1119	int *trycolorp)
1120	{
1121	struct pgflist *freeq;
1122	struct vm_page *pg;
1123	int color, trycolor = *trycolorp;
1124	struct pgfreelist gpgfl, pgfl;
1125
1126	KASSERT(mutex_owned(&uvm_fpageqlock));
1127
1128	color = trycolor;
1129	pgfl = &ucpu->page_free[flist];
1130	gpgfl = &uvm.page_free[flist];
1131	do {
1132	/ cpu, try1 /
1133	if ((pg = LIST_FIRST((freeq =
1134	&pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
1135	KASSERT(pg->pqflags & PQ_FREE);
1136	KASSERT(try1 == PGFL_ZEROS \|\| !(pg->flags & PG_ZERO));
1137	KASSERT(try1 == PGFL_UNKNOWN \|\| (pg->flags & PG_ZERO));
1138	KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg));
1139	VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
1140	uvmexp.cpuhit++;
1141	goto gotit;
1142	}
1143	/ global, try1 /
1144	if ((pg = LIST_FIRST((freeq =
1145	&gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
1146	KASSERT(pg->pqflags & PQ_FREE);
1147	KASSERT(try1 == PGFL_ZEROS \|\| !(pg->flags & PG_ZERO));
1148	KASSERT(try1 == PGFL_UNKNOWN \|\| (pg->flags & PG_ZERO));
1149	KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg));
1150	VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
1151	uvmexp.cpumiss++;
1152	goto gotit;
1153	}
1154	/ cpu, try2 /
1155	if ((pg = LIST_FIRST((freeq =
1156	&pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1157	KASSERT(pg->pqflags & PQ_FREE);
1158	KASSERT(try2 == PGFL_ZEROS \|\| !(pg->flags & PG_ZERO));
1159	KASSERT(try2 == PGFL_UNKNOWN \|\| (pg->flags & PG_ZERO));
1160	KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg));
1161	VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1162	uvmexp.cpuhit++;
1163	goto gotit;
1164	}
1165	/ global, try2 /
1166	if ((pg = LIST_FIRST((freeq =
1167	&gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1168	KASSERT(pg->pqflags & PQ_FREE);
1169	KASSERT(try2 == PGFL_ZEROS \|\| !(pg->flags & PG_ZERO));
1170	KASSERT(try2 == PGFL_UNKNOWN \|\| (pg->flags & PG_ZERO));
1171	KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg));
1172	VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1173	uvmexp.cpumiss++;
1174	goto gotit;
1175	}
1176	color = (color + `1`) & uvmexp.colormask;
1177	} while (color != trycolor);
1178
1179	return (NULL);
1180
1181	gotit:
1182	LIST_REMOVE(pg, pageq.list); / global list /
1183	LIST_REMOVE(pg, listq.list); / per-cpu list /
1184	uvmexp.free--;
1185
1186	/ update zero'd page count /
1187	if (pg->flags & PG_ZERO)
1188	uvmexp.zeropages--;
1189
1190	if (color == trycolor)
1191	uvmexp.colorhit++;
1192	else {
1193	uvmexp.colormiss++;
1194	*trycolorp = color;
1195	}
1196
1197	return (pg);
1198	}
1199
1200	/*
1201	* uvm_pagealloc_strat: allocate vm_page from a particular free list.
1202	*
1203	* => return null if no pages free
1204	* => wake up pagedaemon if number of free pages drops below low water mark
1205	* => if obj != NULL, obj must be locked (to put in obj's tree)
1206	* => if anon != NULL, anon must be locked (to put in anon)
1207	* => only one of obj or anon can be non-null
1208	* => caller must activate/deactivate page if it is not wired.
1209	* => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1210	* => policy decision: it is more important to pull a page off of the
1211	* appropriate priority free list than it is to get a zero'd or
1212	* unknown contents page. This is because we live with the
1213	* consequences of a bad free list decision for the entire
1214	* lifetime of the page, e.g. if the page comes from memory that
1215	* is slower to access.
1216	*/
1217
1218	struct vm_page *
1219	uvm_pagealloc_strat(struct uvm_object obj, voff_t off, struct* vm_anon *anon,
1220	int flags, int strat, int free_list)
1221	{
1222	int lcv, try1, try2, zeroit = `0`, color;
1223	struct uvm_cpu *ucpu;
1224	struct vm_page *pg;
1225	lwp_t *l;
1226
1227	KASSERT(obj == NULL \|\| anon == NULL);
1228	KASSERT(anon == NULL \|\| (flags & UVM_FLAG_COLORMATCH) \|\| off == `0`);
1229	KASSERT(off == trunc_page(off));
1230	KASSERT(obj == NULL \|\| mutex_owned(obj->vmobjlock));
1231	KASSERT(anon == NULL \|\| anon->an_lock == NULL \|\|
1232	mutex_owned(anon->an_lock));
1233
1234	mutex_spin_enter(&uvm_fpageqlock);
1235
1236	/*
1237	* This implements a global round-robin page coloring
1238	* algorithm.
1239	*/
1240
1241	ucpu = curcpu()->ci_data.cpu_uvm;
1242	if (flags & UVM_FLAG_COLORMATCH) {
1243	color = atop(off) & uvmexp.colormask;
1244	} else {
1245	color = ucpu->page_free_nextcolor;
1246	}
1247
1248	/*
1249	* check to see if we need to generate some free pages waking
1250	* the pagedaemon.
1251	*/
1252
1253	uvm_kick_pdaemon();
1254
1255	/*
1256	* fail if any of these conditions is true:
1257	* [1] there really are no free pages, or
1258	* [2] only kernel "reserved" pages remain and
1259	* reserved pages have not been requested.
1260	* [3] only pagedaemon "reserved" pages remain and
1261	* the requestor isn't the pagedaemon.
1262	* we make kernel reserve pages available if called by a
1263	* kernel thread or a realtime thread.
1264	*/
1265	l = curlwp;
1266	if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
1267	flags \|= UVM_PGA_USERESERVE;
1268	}
1269	if ((uvmexp.free <= uvmexp.reserve_kernel &&
1270	(flags & UVM_PGA_USERESERVE) == `0`) \|\|
1271	(uvmexp.free <= uvmexp.reserve_pagedaemon &&
1272	curlwp != uvm.pagedaemon_lwp))
1273	goto fail;
1274
1275	#if PGFL_NQUEUES != 2
1276	#error uvm_pagealloc_strat needs to be updated
1277	#endif
1278
1279	/*
1280	* If we want a zero'd page, try the ZEROS queue first, otherwise
1281	* we try the UNKNOWN queue first.
1282	*/
1283	if (flags & UVM_PGA_ZERO) {
1284	try1 = PGFL_ZEROS;
1285	try2 = PGFL_UNKNOWN;
1286	} else {
1287	try1 = PGFL_UNKNOWN;
1288	try2 = PGFL_ZEROS;
1289	}
1290
1291	again:
1292	switch (strat) {
1293	case UVM_PGA_STRAT_NORMAL:
1294	/ Check freelists: descending priority (ascending id) order /
1295	for (lcv = `0`; lcv < VM_NFREELIST; lcv++) {
1296	pg = uvm_pagealloc_pgfl(ucpu, lcv,
1297	try1, try2, &color);
1298	if (pg != NULL)
1299	goto gotit;
1300	}
1301
1302	/ No pages free! /
1303	goto fail;
1304
1305	case UVM_PGA_STRAT_ONLY:
1306	case UVM_PGA_STRAT_FALLBACK:
1307	/ Attempt to allocate from the specified free list. /
1308	KASSERT(free_list >= `0` && free_list < VM_NFREELIST);
1309	pg = uvm_pagealloc_pgfl(ucpu, free_list,
1310	try1, try2, &color);
1311	if (pg != NULL)
1312	goto gotit;
1313
1314	/ Fall back, if possible. /
1315	if (strat == UVM_PGA_STRAT_FALLBACK) {
1316	strat = UVM_PGA_STRAT_NORMAL;
1317	goto again;
1318	}
1319
1320	/ No pages free! /
1321	goto fail;
1322
1323	default:
1324	panic("uvm_pagealloc_strat: bad strat %d", strat);
1325	/ NOTREACHED /
1326	}
1327
1328	gotit:
1329	/*
1330	* We now know which color we actually allocated from; set
1331	* the next color accordingly.
1332	*/
1333
1334	ucpu->page_free_nextcolor = (color + `1`) & uvmexp.colormask;
1335
1336	/*
1337	* update allocation statistics and remember if we have to
1338	* zero the page
1339	*/
1340
1341	if (flags & UVM_PGA_ZERO) {
1342	if (pg->flags & PG_ZERO) {
1343	uvmexp.pga_zerohit++;
1344	zeroit = `0`;
1345	} else {
1346	uvmexp.pga_zeromiss++;
1347	zeroit = `1`;
1348	}
1349	if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1350	ucpu->page_idle_zero = vm_page_zero_enable;
1351	}
1352	}
1353	KASSERT(pg->pqflags == PQ_FREE);
1354
1355	pg->offset = off;
1356	pg->uobject = obj;
1357	pg->uanon = anon;
1358	pg->flags = PG_BUSY\|PG_CLEAN\|PG_FAKE;
1359	if (anon) {
1360	anon->an_page = pg;
1361	pg->pqflags = PQ_ANON;
1362	atomic_inc_uint(&uvmexp.anonpages);
1363	} else {
1364	if (obj) {
1365	uvm_pageinsert(obj, pg);
1366	}
1367	pg->pqflags = `0`;
1368	}
1369	mutex_spin_exit(&uvm_fpageqlock);
1370
1371	#if defined(UVM_PAGE_TRKOWN)
1372	pg->owner_tag = NULL;
1373	#endif
1374	UVM_PAGE_OWN(pg, "new alloc");
1375
1376	if (flags & UVM_PGA_ZERO) {
1377	/*
1378	* A zero'd page is not clean. If we got a page not already
1379	* zero'd, then we have to zero it ourselves.
1380	*/
1381	pg->flags &= ~PG_CLEAN;
1382	if (zeroit)
1383	pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1384	}
1385
1386	return(pg);
1387
1388	fail:
1389	mutex_spin_exit(&uvm_fpageqlock);
1390	return (NULL);
1391	}
1392
1393	/*
1394	* uvm_pagereplace: replace a page with another
1395	*
1396	* => object must be locked
1397	*/
1398
1399	void
1400	uvm_pagereplace(struct vm_page oldpg, struct* vm_page *newpg)
1401	{
1402	struct uvm_object *uobj = oldpg->uobject;
1403
1404	KASSERT((oldpg->flags & PG_TABLED) != `0`);
1405	KASSERT(uobj != NULL);
1406	KASSERT((newpg->flags & PG_TABLED) == `0`);
1407	KASSERT(newpg->uobject == NULL);
1408	KASSERT(mutex_owned(uobj->vmobjlock));
1409
1410	newpg->uobject = uobj;
1411	newpg->offset = oldpg->offset;
1412
1413	uvm_pageremove_tree(uobj, oldpg);
1414	uvm_pageinsert_tree(uobj, newpg);
1415	uvm_pageinsert_list(uobj, newpg, oldpg);
1416	uvm_pageremove_list(uobj, oldpg);
1417	}
1418
1419	/*
1420	* uvm_pagerealloc: reallocate a page from one object to another
1421	*
1422	* => both objects must be locked
1423	*/
1424
1425	void
1426	uvm_pagerealloc(struct vm_page pg, struct* uvm_object *newobj, voff_t newoff)
1427	{
1428	/*
1429	* remove it from the old object
1430	*/
1431
1432	if (pg->uobject) {
1433	uvm_pageremove(pg->uobject, pg);
1434	}
1435
1436	/*
1437	* put it in the new object
1438	*/
1439
1440	if (newobj) {
1441	pg->uobject = newobj;
1442	pg->offset = newoff;
1443	uvm_pageinsert(newobj, pg);
1444	}
1445	}
1446
1447	#ifdef DEBUG
1448	/*
1449	* check if page is zero-filled
1450	*
1451	* - called with free page queue lock held.
1452	*/
1453	void
1454	uvm_pagezerocheck(struct vm_page *pg)
1455	{
1456	int p, ep;
1457
1458	KASSERT(uvm_zerocheckkva != `0`);
1459	KASSERT(mutex_owned(&uvm_fpageqlock));
1460
1461	/*
1462	* XXX assuming pmap_kenter_pa and pmap_kremove never call
1463	* uvm page allocator.
1464	*
1465	* it might be better to have "CPU-local temporary map" pmap interface.
1466	*/
1467	pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, `0`);
1468	p = (int *)uvm_zerocheckkva;
1469	ep = (int )((char* *)p + PAGE_SIZE);
1470	pmap_update(pmap_kernel());
1471	while (p < ep) {
1472	if (*p != `0`)
1473	panic("PG_ZERO page isn't zero-filled");
1474	p++;
1475	}
1476	pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1477	/*
1478	* pmap_update() is not necessary here because no one except us
1479	* uses this VA.
1480	*/
1481	}
1482	#endif /* DEBUG */
1483
1484	/*
1485	* uvm_pagefree: free page
1486	*
1487	* => erase page's identity (i.e. remove from object)
1488	* => put page on free list
1489	* => caller must lock owning object (either anon or uvm_object)
1490	* => caller must lock page queues
1491	* => assumes all valid mappings of pg are gone
1492	*/
1493
1494	void
1495	uvm_pagefree(struct vm_page *pg)
1496	{
1497	struct pgflist *pgfl;
1498	struct uvm_cpu *ucpu;
1499	int index, color, queue;
1500	bool iszero;
1501
1502	#ifdef DEBUG
1503	if (pg->uobject == (void *)`0xdeadbeef` &&
1504	pg->uanon == (void *)`0xdeadbeef`) {
1505	panic("uvm_pagefree: freeing free page %p", pg);
1506	}
1507	#endif /* DEBUG */
1508
1509	KASSERT((pg->flags & PG_PAGEOUT) == `0`);
1510	KASSERT(!(pg->pqflags & PQ_FREE));
1511	//KASSERT(mutex_owned(&uvm_pageqlock) \|\| !uvmpdpol_pageisqueued_p(pg));
1512	KASSERT(pg->uobject == NULL \|\| mutex_owned(pg->uobject->vmobjlock));
1513	KASSERT(pg->uobject != NULL \|\| pg->uanon == NULL \|\|
1514	mutex_owned(pg->uanon->an_lock));
1515
1516	/*
1517	* if the page is loaned, resolve the loan instead of freeing.
1518	*/
1519
1520	if (pg->loan_count) {
1521	KASSERT(pg->wire_count == `0`);
1522
1523	/*
1524	* if the page is owned by an anon then we just want to
1525	* drop anon ownership. the kernel will free the page when
1526	* it is done with it. if the page is owned by an object,
1527	* remove it from the object and mark it dirty for the benefit
1528	* of possible anon owners.
1529	*
1530	* regardless of previous ownership, wakeup any waiters,
1531	* unbusy the page, and we're done.
1532	*/
1533
1534	if (pg->uobject != NULL) {
1535	uvm_pageremove(pg->uobject, pg);
1536	pg->flags &= ~PG_CLEAN;
1537	} else if (pg->uanon != NULL) {
1538	if ((pg->pqflags & PQ_ANON) == `0`) {
1539	pg->loan_count--;
1540	} else {
1541	pg->pqflags &= ~PQ_ANON;
1542	atomic_dec_uint(&uvmexp.anonpages);
1543	}
1544	pg->uanon->an_page = NULL;
1545	pg->uanon = NULL;
1546	}
1547	if (pg->flags & PG_WANTED) {
1548	wakeup(pg);
1549	}
1550	pg->flags &= ~(PG_WANTED\|PG_BUSY\|PG_RELEASED\|PG_PAGER1);
1551	#ifdef UVM_PAGE_TRKOWN
1552	pg->owner_tag = NULL;
1553	#endif
1554	if (pg->loan_count) {
1555	KASSERT(pg->uobject == NULL);
1556	if (pg->uanon == NULL) {
1557	KASSERT(mutex_owned(&uvm_pageqlock));
1558	uvm_pagedequeue(pg);
1559	}
1560	return;
1561	}
1562	}
1563
1564	/*
1565	* remove page from its object or anon.
1566	*/
1567
1568	if (pg->uobject != NULL) {
1569	uvm_pageremove(pg->uobject, pg);
1570	} else if (pg->uanon != NULL) {
1571	pg->uanon->an_page = NULL;
1572	atomic_dec_uint(&uvmexp.anonpages);
1573	}
1574
1575	/*
1576	* now remove the page from the queues.
1577	*/
1578	if (uvmpdpol_pageisqueued_p(pg)) {
1579	KASSERT(mutex_owned(&uvm_pageqlock));
1580	uvm_pagedequeue(pg);
1581	}
1582
1583	/*
1584	* if the page was wired, unwire it now.
1585	*/
1586
1587	if (pg->wire_count) {
1588	pg->wire_count = `0`;
1589	uvmexp.wired--;
1590	}
1591
1592	/*
1593	* and put on free queue
1594	*/
1595
1596	iszero = (pg->flags & PG_ZERO);
1597	index = uvm_page_lookup_freelist(pg);
1598	color = VM_PGCOLOR_BUCKET(pg);
1599	queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN);
1600
1601	#ifdef DEBUG
1602	pg->uobject = (void *)`0xdeadbeef`;
1603	pg->uanon = (void *)`0xdeadbeef`;
1604	#endif
1605
1606	mutex_spin_enter(&uvm_fpageqlock);
1607	pg->pqflags = PQ_FREE;
1608
1609	#ifdef DEBUG
1610	if (iszero)
1611	uvm_pagezerocheck(pg);
1612	#endif /* DEBUG */
1613
1614
1615	/ global list /
1616	pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1617	LIST_INSERT_HEAD(pgfl, pg, pageq.list);
1618	uvmexp.free++;
1619	if (iszero) {
1620	uvmexp.zeropages++;
1621	}
1622
1623	/ per-cpu list /
1624	ucpu = curcpu()->ci_data.cpu_uvm;
1625	pg->offset = (uintptr_t)ucpu;
1626	pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1627	LIST_INSERT_HEAD(pgfl, pg, listq.list);
1628	ucpu->pages[queue]++;
1629	if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1630	ucpu->page_idle_zero = vm_page_zero_enable;
1631	}
1632
1633	mutex_spin_exit(&uvm_fpageqlock);
1634	}
1635
1636	/*
1637	* uvm_page_unbusy: unbusy an array of pages.
1638	*
1639	* => pages must either all belong to the same object, or all belong to anons.
1640	* => if pages are object-owned, object must be locked.
1641	* => if pages are anon-owned, anons must be locked.
1642	* => caller must lock page queues if pages may be released.
1643	* => caller must make sure that anon-owned pages are not PG_RELEASED.
1644	*/
1645
1646	void
1647	uvm_page_unbusy(struct vm_page *pgs, int* npgs)
1648	{
1649	struct vm_page *pg;
1650	int i;
1651	UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1652
1653	for (i = `0`; i < npgs; i++) {
1654	pg = pgs[i];
1655	if (pg == NULL \|\| pg == PGO_DONTCARE) {
1656	continue;
1657	}
1658
1659	KASSERT(uvm_page_locked_p(pg));
1660	KASSERT(pg->flags & PG_BUSY);
1661	KASSERT((pg->flags & PG_PAGEOUT) == `0`);
1662	if (pg->flags & PG_WANTED) {
1663	wakeup(pg);
1664	}
1665	if (pg->flags & PG_RELEASED) {
1666	UVMHIST_LOG(ubchist, "releasing pg %p", pg,`0`,`0`,`0`);
1667	KASSERT(pg->uobject != NULL \|\|
1668	(pg->uanon != NULL && pg->uanon->an_ref > `0`));
1669	pg->flags &= ~PG_RELEASED;
1670	uvm_pagefree(pg);
1671	} else {
1672	UVMHIST_LOG(ubchist, "unbusying pg %p", pg,`0`,`0`,`0`);
1673	KASSERT((pg->flags & PG_FAKE) == `0`);
1674	pg->flags &= ~(PG_WANTED\|PG_BUSY);
1675	UVM_PAGE_OWN(pg, NULL);
1676	}
1677	}
1678	}
1679
1680	#if defined(UVM_PAGE_TRKOWN)
1681	/*
1682	* uvm_page_own: set or release page ownership
1683	*
1684	* => this is a debugging function that keeps track of who sets PG_BUSY
1685	* and where they do it. it can be used to track down problems
1686	* such a process setting "PG_BUSY" and never releasing it.
1687	* => page's object [if any] must be locked
1688	* => if "tag" is NULL then we are releasing page ownership
1689	*/
1690	void
1691	uvm_page_own(struct vm_page pg, const* char *tag)
1692	{
1693
1694	KASSERT((pg->flags & (PG_PAGEOUT\|PG_RELEASED)) == `0`);
1695	KASSERT((pg->flags & PG_WANTED) == `0`);
1696	KASSERT(uvm_page_locked_p(pg));
1697
1698	/ gain ownership? /
1699	if (tag) {
1700	KASSERT((pg->flags & PG_BUSY) != `0`);
1701	if (pg->owner_tag) {
1702	printf("uvm_page_own: page %p already owned "
1703	"by proc %d [%s]\n", pg,
1704	pg->owner, pg->owner_tag);
1705	panic("uvm_page_own");
1706	}
1707	pg->owner = curproc->p_pid;
1708	pg->lowner = curlwp->l_lid;
1709	pg->owner_tag = tag;
1710	return;
1711	}
1712
1713	/ drop ownership /
1714	KASSERT((pg->flags & PG_BUSY) == `0`);
1715	if (pg->owner_tag == NULL) {
1716	printf("uvm_page_own: dropping ownership of an non-owned "
1717	"page (%p)\n", pg);
1718	panic("uvm_page_own");
1719	}
1720	if (!uvmpdpol_pageisqueued_p(pg)) {
1721	KASSERT((pg->uanon == NULL && pg->uobject == NULL) \|\|
1722	pg->wire_count > `0`);
1723	} else {
1724	KASSERT(pg->wire_count == `0`);
1725	}
1726	pg->owner_tag = NULL;
1727	}
1728	#endif
1729
1730	/*
1731	* uvm_pageidlezero: zero free pages while the system is idle.
1732	*
1733	* => try to complete one color bucket at a time, to reduce our impact
1734	* on the CPU cache.
1735	* => we loop until we either reach the target or there is a lwp ready
1736	* to run, or MD code detects a reason to break early.
1737	*/
1738	void
1739	uvm_pageidlezero(void)
1740	{
1741	struct vm_page *pg;
1742	struct pgfreelist pgfl, gpgfl;
1743	struct uvm_cpu *ucpu;
1744	int free_list, firstbucket, nextbucket;
1745	bool lcont = false;
1746
1747	ucpu = curcpu()->ci_data.cpu_uvm;
1748	if (!ucpu->page_idle_zero \|\|
1749	ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1750	ucpu->page_idle_zero = false;
1751	return;
1752	}
1753	if (!mutex_tryenter(&uvm_fpageqlock)) {
1754	/ Contention: let other CPUs to use the lock. /
1755	return;
1756	}
1757	firstbucket = ucpu->page_free_nextcolor;
1758	nextbucket = firstbucket;
1759	do {
1760	for (free_list = `0`; free_list < VM_NFREELIST; free_list++) {
1761	if (sched_curcpu_runnable_p()) {
1762	goto quit;
1763	}
1764	pgfl = &ucpu->page_free[free_list];
1765	gpgfl = &uvm.page_free[free_list];
1766	while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[
1767	nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1768	if (lcont \|\| sched_curcpu_runnable_p()) {
1769	goto quit;
1770	}
1771	LIST_REMOVE(pg, pageq.list); / global list /
1772	LIST_REMOVE(pg, listq.list); / per-cpu list /
1773	ucpu->pages[PGFL_UNKNOWN]--;
1774	uvmexp.free--;
1775	KASSERT(pg->pqflags == PQ_FREE);
1776	pg->pqflags = `0`;
1777	mutex_spin_exit(&uvm_fpageqlock);
1778	#ifdef PMAP_PAGEIDLEZERO
1779	if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1780
1781	/*
1782	* The machine-dependent code detected
1783	* some reason for us to abort zeroing
1784	* pages, probably because there is a
1785	* process now ready to run.
1786	*/
1787
1788	mutex_spin_enter(&uvm_fpageqlock);
1789	pg->pqflags = PQ_FREE;
1790	LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1791	nextbucket].pgfl_queues[
1792	PGFL_UNKNOWN], pg, pageq.list);
1793	LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1794	nextbucket].pgfl_queues[
1795	PGFL_UNKNOWN], pg, listq.list);
1796	ucpu->pages[PGFL_UNKNOWN]++;
1797	uvmexp.free++;
1798	uvmexp.zeroaborts++;
1799	goto quit;
1800	}
1801	#else
1802	pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1803	#endif /* PMAP_PAGEIDLEZERO */
1804	pg->flags \|= PG_ZERO;
1805
1806	if (!mutex_tryenter(&uvm_fpageqlock)) {
1807	lcont = true;
1808	mutex_spin_enter(&uvm_fpageqlock);
1809	} else {
1810	lcont = false;
1811	}
1812	pg->pqflags = PQ_FREE;
1813	LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1814	nextbucket].pgfl_queues[PGFL_ZEROS],
1815	pg, pageq.list);
1816	LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1817	nextbucket].pgfl_queues[PGFL_ZEROS],
1818	pg, listq.list);
1819	ucpu->pages[PGFL_ZEROS]++;
1820	uvmexp.free++;
1821	uvmexp.zeropages++;
1822	}
1823	}
1824	if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1825	break;
1826	}
1827	nextbucket = (nextbucket + `1`) & uvmexp.colormask;
1828	} while (nextbucket != firstbucket);
1829	ucpu->page_idle_zero = false;
1830	quit:
1831	mutex_spin_exit(&uvm_fpageqlock);
1832	}
1833
1834	/*
1835	* uvm_pagelookup: look up a page
1836	*
1837	* => caller should lock object to keep someone from pulling the page
1838	* out from under it
1839	*/
1840
1841	struct vm_page *
1842	uvm_pagelookup(struct uvm_object *obj, voff_t off)
1843	{
1844	struct vm_page *pg;
1845
1846	KASSERT(mutex_owned(obj->vmobjlock));
1847
1848	pg = rb_tree_find_node(&obj->rb_tree, &off);
1849
1850	KASSERT(pg == NULL \|\| obj->uo_npages != `0`);
1851	KASSERT(pg == NULL \|\| (pg->flags & (PG_RELEASED\|PG_PAGEOUT)) == `0` \|\|
1852	(pg->flags & PG_BUSY) != `0`);
1853	return pg;
1854	}
1855
1856	/*
1857	* uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1858	*
1859	* => caller must lock page queues
1860	*/
1861
1862	void
1863	uvm_pagewire(struct vm_page *pg)
1864	{
1865	KASSERT(mutex_owned(&uvm_pageqlock));
1866	#if defined(READAHEAD_STATS)
1867	if ((pg->pqflags & PQ_READAHEAD) != `0`) {
1868	uvm_ra_hit.ev_count++;
1869	pg->pqflags &= ~PQ_READAHEAD;
1870	}
1871	#endif /* defined(READAHEAD_STATS) */
1872	if (pg->wire_count == `0`) {
1873	uvm_pagedequeue(pg);
1874	uvmexp.wired++;
1875	}
1876	pg->wire_count++;
1877	}
1878
1879	/*
1880	* uvm_pageunwire: unwire the page.
1881	*
1882	* => activate if wire count goes to zero.
1883	* => caller must lock page queues
1884	*/
1885
1886	void
1887	uvm_pageunwire(struct vm_page *pg)
1888	{
1889	KASSERT(mutex_owned(&uvm_pageqlock));
1890	pg->wire_count--;
1891	if (pg->wire_count == `0`) {
1892	uvm_pageactivate(pg);
1893	uvmexp.wired--;
1894	}
1895	}
1896
1897	/*
1898	* uvm_pagedeactivate: deactivate page
1899	*
1900	* => caller must lock page queues
1901	* => caller must check to make sure page is not wired
1902	* => object that page belongs to must be locked (so we can adjust pg->flags)
1903	* => caller must clear the reference on the page before calling
1904	*/
1905
1906	void
1907	uvm_pagedeactivate(struct vm_page *pg)
1908	{
1909
1910	KASSERT(mutex_owned(&uvm_pageqlock));
1911	KASSERT(uvm_page_locked_p(pg));
1912	KASSERT(pg->wire_count != `0` \|\| uvmpdpol_pageisqueued_p(pg));
1913	uvmpdpol_pagedeactivate(pg);
1914	}
1915
1916	/*
1917	* uvm_pageactivate: activate page
1918	*
1919	* => caller must lock page queues
1920	*/
1921
1922	void
1923	uvm_pageactivate(struct vm_page *pg)
1924	{
1925
1926	KASSERT(mutex_owned(&uvm_pageqlock));
1927	KASSERT(uvm_page_locked_p(pg));
1928	#if defined(READAHEAD_STATS)
1929	if ((pg->pqflags & PQ_READAHEAD) != `0`) {
1930	uvm_ra_hit.ev_count++;
1931	pg->pqflags &= ~PQ_READAHEAD;
1932	}
1933	#endif /* defined(READAHEAD_STATS) */
1934	if (pg->wire_count != `0`) {
1935	return;
1936	}
1937	uvmpdpol_pageactivate(pg);
1938	}
1939
1940	/*
1941	* uvm_pagedequeue: remove a page from any paging queue
1942	*/
1943
1944	void
1945	uvm_pagedequeue(struct vm_page *pg)
1946	{
1947
1948	if (uvmpdpol_pageisqueued_p(pg)) {
1949	KASSERT(mutex_owned(&uvm_pageqlock));
1950	}
1951
1952	uvmpdpol_pagedequeue(pg);
1953	}
1954
1955	/*
1956	* uvm_pageenqueue: add a page to a paging queue without activating.
1957	* used where a page is not really demanded (yet). eg. read-ahead
1958	*/
1959
1960	void
1961	uvm_pageenqueue(struct vm_page *pg)
1962	{
1963
1964	KASSERT(mutex_owned(&uvm_pageqlock));
1965	if (pg->wire_count != `0`) {
1966	return;
1967	}
1968	uvmpdpol_pageenqueue(pg);
1969	}
1970
1971	/*
1972	* uvm_pagezero: zero fill a page
1973	*
1974	* => if page is part of an object then the object should be locked
1975	* to protect pg->flags.
1976	*/
1977
1978	void
1979	uvm_pagezero(struct vm_page *pg)
1980	{
1981	pg->flags &= ~PG_CLEAN;
1982	pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1983	}
1984
1985	/*
1986	* uvm_pagecopy: copy a page
1987	*
1988	* => if page is part of an object then the object should be locked
1989	* to protect pg->flags.
1990	*/
1991
1992	void
1993	uvm_pagecopy(struct vm_page src, struct* vm_page *dst)
1994	{
1995
1996	dst->flags &= ~PG_CLEAN;
1997	pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1998	}
1999
2000	/*
2001	* uvm_pageismanaged: test it see that a page (specified by PA) is managed.
2002	*/
2003
2004	bool
2005	uvm_pageismanaged(paddr_t pa)
2006	{
2007
2008	return (vm_physseg_find(atop(pa), NULL) != -`1`);
2009	}
2010
2011	/*
2012	* uvm_page_lookup_freelist: look up the free list for the specified page
2013	*/
2014
2015	int
2016	uvm_page_lookup_freelist(struct vm_page *pg)
2017	{
2018	int lcv;
2019
2020	lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
2021	KASSERT(lcv != -`1`);
2022	return (VM_PHYSMEM_PTR(lcv)->free_list);
2023	}
2024
2025	/*
2026	* uvm_page_locked_p: return true if object associated with page is
2027	* locked. this is a weak check for runtime assertions only.
2028	*/
2029
2030	bool
2031	uvm_page_locked_p(struct vm_page *pg)
2032	{
2033
2034	if (pg->uobject != NULL) {
2035	return mutex_owned(pg->uobject->vmobjlock);
2036	}
2037	if (pg->uanon != NULL) {
2038	return mutex_owned(pg->uanon->an_lock);
2039	}
2040	return true;
2041	}
2042
2043	#if defined(DDB) \|\| defined(DEBUGPRINT)
2044
2045	/*
2046	* uvm_page_printit: actually print the page
2047	*/
2048
2049	static const char page_flagbits[] = UVM_PGFLAGBITS;
2050	static const char page_pqflagbits[] = UVM_PQFLAGBITS;
2051
2052	void
2053	uvm_page_printit(struct vm_page *pg, bool full,
2054	void (pr)(const* char *, ...))
2055	{
2056	struct vm_page *tpg;
2057	struct uvm_object *uobj;
2058	struct pgflist *pgl;
2059	char pgbuf[`128`];
2060	char pqbuf[`128`];
2061
2062	(*pr)("PAGE %p:\n", pg);
2063	snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags);
2064	snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags);
2065	(*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n",
2066	pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg));
2067	(*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
2068	pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
2069	#if defined(UVM_PAGE_TRKOWN)
2070	if (pg->flags & PG_BUSY)
2071	(*pr)(" owning process = %d, tag=%s\n",
2072	pg->owner, pg->owner_tag);
2073	else
2074	(*pr)(" page not busy, no owner\n");
2075	#else
2076	(*pr)(" [page ownership tracking disabled]\n");
2077	#endif
2078
2079	if (!full)
2080	return;
2081
2082	/ cross-verify object/anon /
2083	if ((pg->pqflags & PQ_FREE) == `0`) {
2084	if (pg->pqflags & PQ_ANON) {
2085	if (pg->uanon == NULL \|\| pg->uanon->an_page != pg)
2086	(*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
2087	(pg->uanon) ? pg->uanon->an_page : NULL);
2088	else
2089	(*pr)(" anon backpointer is OK\n");
2090	} else {
2091	uobj = pg->uobject;
2092	if (uobj) {
2093	(*pr)(" checking object list\n");
2094	TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) {
2095	if (tpg == pg) {
2096	break;
2097	}
2098	}
2099	if (tpg)
2100	(*pr)(" page found on object list\n");
2101	else
2102	(*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
2103	}
2104	}
2105	}
2106
2107	/ cross-verify page queue /
2108	if (pg->pqflags & PQ_FREE) {
2109	int fl = uvm_page_lookup_freelist(pg);
2110	int color = VM_PGCOLOR_BUCKET(pg);
2111	pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[
2112	((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN];
2113	} else {
2114	pgl = NULL;
2115	}
2116
2117	if (pgl) {
2118	(*pr)(" checking pageq list\n");
2119	LIST_FOREACH(tpg, pgl, pageq.list) {
2120	if (tpg == pg) {
2121	break;
2122	}
2123	}
2124	if (tpg)
2125	(*pr)(" page found on pageq list\n");
2126	else
2127	(*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
2128	}
2129	}
2130
2131	/*
2132	* uvm_pages_printthem - print a summary of all managed pages
2133	*/
2134
2135	void
2136	uvm_page_printall(void (pr)(const* char *, ...))
2137	{
2138	unsigned i;
2139	struct vm_page *pg;
2140
2141	(*pr)("%18s %4s %4s %18s %18s"
2142	#ifdef UVM_PAGE_TRKOWN
2143	" OWNER"
2144	#endif
2145	"\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON");
2146	for (i = `0`; i < vm_nphysmem; i++) {
2147	for (pg = VM_PHYSMEM_PTR(i)->pgs; pg < VM_PHYSMEM_PTR(i)->lastpg; pg++) {
2148	(*pr)("%18p %04x %04x %18p %18p",
2149	pg, pg->flags, pg->pqflags, pg->uobject,
2150	pg->uanon);
2151	#ifdef UVM_PAGE_TRKOWN
2152	if (pg->flags & PG_BUSY)
2153	(*pr)(" %d [%s]", pg->owner, pg->owner_tag);
2154	#endif
2155	(*pr)("\n");
2156	}
2157	}
2158	}
2159
2160	#endif /* DDB \|\| DEBUGPRINT */
2161

Browse the source code of src/src/sys/uvm/uvm_page.c