uvm_pdaemon.c source code [src/src/sys/uvm/uvm_pdaemon.c]

1	/ $NetBSD: uvm_pdaemon.c,v 1.108 2013/10/25 20:28:33 martin Exp $ /
2
3	/*
4	* Copyright (c) 1997 Charles D. Cranor and Washington University.
5	* Copyright (c) 1991, 1993, The Regents of the University of California.
6	*
7	* All rights reserved.
8	*
9	* This code is derived from software contributed to Berkeley by
10	* The Mach Operating System project at Carnegie-Mellon University.
11	*
12	* Redistribution and use in source and binary forms, with or without
13	* modification, are permitted provided that the following conditions
14	* are met:
15	* 1. Redistributions of source code must retain the above copyright
16	* notice, this list of conditions and the following disclaimer.
17	* 2. Redistributions in binary form must reproduce the above copyright
18	* notice, this list of conditions and the following disclaimer in the
19	* documentation and/or other materials provided with the distribution.
20	* 3. Neither the name of the University nor the names of its contributors
21	* may be used to endorse or promote products derived from this software
22	* without specific prior written permission.
23	*
24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34	* SUCH DAMAGE.
35	*
36	* @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
37	* from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
38	*
39	*
40	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
41	* All rights reserved.
42	*
43	* Permission to use, copy, modify and distribute this software and
44	* its documentation is hereby granted, provided that both the copyright
45	* notice and this permission notice appear in all copies of the
46	* software, derivative works or modified versions, and any portions
47	* thereof, and that both notices appear in supporting documentation.
48	*
49	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52	*
53	* Carnegie Mellon requests users of this software to return to
54	*
55	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
56	* School of Computer Science
57	* Carnegie Mellon University
58	* Pittsburgh PA 15213-3890
59	*
60	* any improvements or extensions that they make and grant Carnegie the
61	* rights to redistribute these changes.
62	*/
63
64	/*
65	* uvm_pdaemon.c: the page daemon
66	*/
67
68	#include <sys/cdefs.h>
69	__KERNEL_RCSID(`0`, "$NetBSD: uvm_pdaemon.c,v 1.108 2013/10/25 20:28:33 martin Exp $");
70
71	#include "opt_uvmhist.h"
72	#include "opt_readahead.h"
73
74	#include <sys/param.h>
75	#include <sys/proc.h>
76	#include <sys/systm.h>
77	#include <sys/kernel.h>
78	#include <sys/pool.h>
79	#include <sys/buf.h>
80	#include <sys/module.h>
81	#include <sys/atomic.h>
82
83	#include <uvm/uvm.h>
84	#include <uvm/uvm_pdpolicy.h>
85
86	#ifdef UVMHIST
87	UVMHIST_DEFINE(pdhist);
88	#endif
89
90	/*
91	* UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
92	* in a pass thru the inactive list when swap is full. the value should be
93	* "small"... if it's too large we'll cycle the active pages thru the inactive
94	* queue too quickly to for them to be referenced and avoid being freed.
95	*/
96
97	#define UVMPD_NUMDIRTYREACTS 16
98
99	#define UVMPD_NUMTRYLOCKOWNER 16
100
101	/*
102	* local prototypes
103	*/
104
105	static void uvmpd_scan(void);
106	static void uvmpd_scan_queue(void);
107	static void uvmpd_tune(void);
108
109	static unsigned int uvm_pagedaemon_waiters;
110
111	/*
112	* XXX hack to avoid hangs when large processes fork.
113	*/
114	u_int uvm_extrapages;
115
116	/*
117	* uvm_wait: wait (sleep) for the page daemon to free some pages
118	*
119	* => should be called with all locks released
120	* => should _not_ be called by the page daemon (to avoid deadlock)
121	*/
122
123	void
124	uvm_wait(const char *wmsg)
125	{
126	int timo = `0`;
127
128	mutex_spin_enter(&uvm_fpageqlock);
129
130	/*
131	* check for page daemon going to sleep (waiting for itself)
132	*/
133
134	if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == `0`) {
135	/*
136	* now we have a problem: the pagedaemon wants to go to
137	* sleep until it frees more memory. but how can it
138	* free more memory if it is asleep? that is a deadlock.
139	* we have two options:
140	* [1] panic now
141	* [2] put a timeout on the sleep, thus causing the
142	* pagedaemon to only pause (rather than sleep forever)
143	*
144	* note that option [2] will only help us if we get lucky
145	* and some other process on the system breaks the deadlock
146	* by exiting or freeing memory (thus allowing the pagedaemon
147	* to continue). for now we panic if DEBUG is defined,
148	* otherwise we hope for the best with option [2] (better
149	* yet, this should never happen in the first place!).
150	*/
151
152	printf("pagedaemon: deadlock detected!\n");
153	timo = hz >> `3`; / set timeout /
154	#if defined(DEBUG)
155	/ DEBUG: panic so we can debug it /
156	panic("pagedaemon deadlock");
157	#endif
158	}
159
160	uvm_pagedaemon_waiters++;
161	wakeup(&uvm.pagedaemon); / wake the daemon! /
162	UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
163	}
164
165	/*
166	* uvm_kick_pdaemon: perform checks to determine if we need to
167	* give the pagedaemon a nudge, and do so if necessary.
168	*
169	* => called with uvm_fpageqlock held.
170	*/
171
172	void
173	uvm_kick_pdaemon(void)
174	{
175
176	KASSERT(mutex_owned(&uvm_fpageqlock));
177
178	if (uvmexp.free + uvmexp.paging < uvmexp.freemin \|\|
179	(uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
180	uvmpdpol_needsscan_p()) \|\|
181	uvm_km_va_starved_p()) {
182	wakeup(&uvm.pagedaemon);
183	}
184	}
185
186	/*
187	* uvmpd_tune: tune paging parameters
188	*
189	* => called when ever memory is added (or removed?) to the system
190	* => caller must call with page queues locked
191	*/
192
193	static void
194	uvmpd_tune(void)
195	{
196	int val;
197
198	UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
199
200	/*
201	* try to keep 0.5% of available RAM free, but limit to between
202	* 128k and 1024k per-CPU. XXX: what are these values good for?
203	*/
204	val = uvmexp.npages / `200`;
205	val = MAX(val, (`128`*`1024`) >> PAGE_SHIFT);
206	val = MIN(val, (`1024`*`1024`) >> PAGE_SHIFT);
207	val *= ncpu;
208
209	/ Make sure there's always a user page free. /
210	if (val < uvmexp.reserve_kernel + `1`)
211	val = uvmexp.reserve_kernel + `1`;
212	uvmexp.freemin = val;
213
214	/ Calculate free target. /
215	val = (uvmexp.freemin * `4`) / `3`;
216	if (val <= uvmexp.freemin)
217	val = uvmexp.freemin + `1`;
218	uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, `0`);
219
220	uvmexp.wiredmax = uvmexp.npages / `3`;
221	UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
222	uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, `0`);
223	}
224
225	/*
226	* uvm_pageout: the main loop for the pagedaemon
227	*/
228
229	void
230	uvm_pageout(void *arg)
231	{
232	int bufcnt, npages = `0`;
233	int extrapages = `0`;
234	struct pool *pp;
235
236	UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
237
238	UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", `0`, `0`, `0`, `0`);
239
240	/*
241	* ensure correct priority and set paging parameters...
242	*/
243
244	uvm.pagedaemon_lwp = curlwp;
245	mutex_enter(&uvm_pageqlock);
246	npages = uvmexp.npages;
247	uvmpd_tune();
248	mutex_exit(&uvm_pageqlock);
249
250	/*
251	* main loop
252	*/
253
254	for (;;) {
255	bool needsscan, needsfree, kmem_va_starved;
256
257	kmem_va_starved = uvm_km_va_starved_p();
258
259	mutex_spin_enter(&uvm_fpageqlock);
260	if ((uvm_pagedaemon_waiters == `0` \|\| uvmexp.paging > `0`) &&
261	!kmem_va_starved) {
262	UVMHIST_LOG(pdhist," <<SLEEPING>>",`0`,`0`,`0`,`0`);
263	UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
264	&uvm_fpageqlock, false, "pgdaemon", `0`);
265	uvmexp.pdwoke++;
266	UVMHIST_LOG(pdhist," <<WOKE UP>>",`0`,`0`,`0`,`0`);
267	} else {
268	mutex_spin_exit(&uvm_fpageqlock);
269	}
270
271	/*
272	* now lock page queues and recompute inactive count
273	*/
274
275	mutex_enter(&uvm_pageqlock);
276	if (npages != uvmexp.npages \|\| extrapages != uvm_extrapages) {
277	npages = uvmexp.npages;
278	extrapages = uvm_extrapages;
279	mutex_spin_enter(&uvm_fpageqlock);
280	uvmpd_tune();
281	mutex_spin_exit(&uvm_fpageqlock);
282	}
283
284	uvmpdpol_tune();
285
286	/*
287	* Estimate a hint. Note that bufmem are returned to
288	* system only when entire pool page is empty.
289	*/
290	mutex_spin_enter(&uvm_fpageqlock);
291	bufcnt = uvmexp.freetarg - uvmexp.free;
292	if (bufcnt < `0`)
293	bufcnt = `0`;
294
295	UVMHIST_LOG(pdhist," free/ftarg=%d/%d",
296	uvmexp.free, uvmexp.freetarg, `0`,`0`);
297
298	needsfree = uvmexp.free + uvmexp.paging < uvmexp.freetarg;
299	needsscan = needsfree \|\| uvmpdpol_needsscan_p();
300
301	/*
302	* scan if needed
303	*/
304	if (needsscan) {
305	mutex_spin_exit(&uvm_fpageqlock);
306	uvmpd_scan();
307	mutex_spin_enter(&uvm_fpageqlock);
308	}
309
310	/*
311	* if there's any free memory to be had,
312	* wake up any waiters.
313	*/
314	if (uvmexp.free > uvmexp.reserve_kernel \|\|
315	uvmexp.paging == `0`) {
316	wakeup(&uvmexp.free);
317	uvm_pagedaemon_waiters = `0`;
318	}
319	mutex_spin_exit(&uvm_fpageqlock);
320
321	/*
322	* scan done. unlock page queues (the only lock we are holding)
323	*/
324	mutex_exit(&uvm_pageqlock);
325
326	/*
327	* if we don't need free memory, we're done.
328	*/
329
330	if (!needsfree && !kmem_va_starved)
331	continue;
332
333	/*
334	* kill unused metadata buffers.
335	*/
336	mutex_enter(&bufcache_lock);
337	buf_drain(bufcnt << PAGE_SHIFT);
338	mutex_exit(&bufcache_lock);
339
340	/*
341	* drain the pools.
342	*/
343	pool_drain(&pp);
344	}
345	/NOTREACHED/
346	}
347
348
349	/*
350	* uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
351	*/
352
353	void
354	uvm_aiodone_worker(struct work wk, void* *dummy)
355	{
356	struct buf bp = (void* *)wk;
357
358	KASSERT(&bp->b_work == wk);
359
360	/*
361	* process an i/o that's done.
362	*/
363
364	(*bp->b_iodone)(bp);
365	}
366
367	void
368	uvm_pageout_start(int npages)
369	{
370
371	mutex_spin_enter(&uvm_fpageqlock);
372	uvmexp.paging += npages;
373	mutex_spin_exit(&uvm_fpageqlock);
374	}
375
376	void
377	uvm_pageout_done(int npages)
378	{
379
380	mutex_spin_enter(&uvm_fpageqlock);
381	KASSERT(uvmexp.paging >= npages);
382	uvmexp.paging -= npages;
383
384	/*
385	* wake up either of pagedaemon or LWPs waiting for it.
386	*/
387
388	if (uvmexp.free <= uvmexp.reserve_kernel) {
389	wakeup(&uvm.pagedaemon);
390	} else {
391	wakeup(&uvmexp.free);
392	uvm_pagedaemon_waiters = `0`;
393	}
394	mutex_spin_exit(&uvm_fpageqlock);
395	}
396
397	/*
398	* uvmpd_trylockowner: trylock the page's owner.
399	*
400	* => called with pageq locked.
401	* => resolve orphaned O->A loaned page.
402	* => return the locked mutex on success. otherwise, return NULL.
403	*/
404
405	kmutex_t *
406	uvmpd_trylockowner(struct vm_page *pg)
407	{
408	struct uvm_object *uobj = pg->uobject;
409	kmutex_t *slock;
410
411	KASSERT(mutex_owned(&uvm_pageqlock));
412
413	if (uobj != NULL) {
414	slock = uobj->vmobjlock;
415	} else {
416	struct vm_anon *anon = pg->uanon;
417
418	KASSERT(anon != NULL);
419	slock = anon->an_lock;
420	}
421
422	if (!mutex_tryenter(slock)) {
423	return NULL;
424	}
425
426	if (uobj == NULL) {
427
428	/*
429	* set PQ_ANON if it isn't set already.
430	*/
431
432	if ((pg->pqflags & PQ_ANON) == `0`) {
433	KASSERT(pg->loan_count > `0`);
434	pg->loan_count--;
435	pg->pqflags \|= PQ_ANON;
436	/ anon now owns it /
437	}
438	}
439
440	return slock;
441	}
442
443	#if defined(VMSWAP)
444	struct swapcluster {
445	int swc_slot;
446	int swc_nallocated;
447	int swc_nused;
448	struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
449	};
450
451	static void
452	swapcluster_init(struct swapcluster *swc)
453	{
454
455	swc->swc_slot = `0`;
456	swc->swc_nused = `0`;
457	}
458
459	static int
460	swapcluster_allocslots(struct swapcluster *swc)
461	{
462	int slot;
463	int npages;
464
465	if (swc->swc_slot != `0`) {
466	return `0`;
467	}
468
469	/ Even with strange MAXPHYS, the shift*
470	implicitly rounds down to a page. /*
471	npages = MAXPHYS >> PAGE_SHIFT;
472	slot = uvm_swap_alloc(&npages, true);
473	if (slot == `0`) {
474	return ENOMEM;
475	}
476	swc->swc_slot = slot;
477	swc->swc_nallocated = npages;
478	swc->swc_nused = `0`;
479
480	return `0`;
481	}
482
483	static int
484	swapcluster_add(struct swapcluster swc, struct* vm_page *pg)
485	{
486	int slot;
487	struct uvm_object *uobj;
488
489	KASSERT(swc->swc_slot != `0`);
490	KASSERT(swc->swc_nused < swc->swc_nallocated);
491	KASSERT((pg->pqflags & PQ_SWAPBACKED) != `0`);
492
493	slot = swc->swc_slot + swc->swc_nused;
494	uobj = pg->uobject;
495	if (uobj == NULL) {
496	KASSERT(mutex_owned(pg->uanon->an_lock));
497	pg->uanon->an_swslot = slot;
498	} else {
499	int result;
500
501	KASSERT(mutex_owned(uobj->vmobjlock));
502	result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
503	if (result == -`1`) {
504	return ENOMEM;
505	}
506	}
507	swc->swc_pages[swc->swc_nused] = pg;
508	swc->swc_nused++;
509
510	return `0`;
511	}
512
513	static void
514	swapcluster_flush(struct swapcluster *swc, bool now)
515	{
516	int slot;
517	int nused;
518	int nallocated;
519	int error __diagused;
520
521	if (swc->swc_slot == `0`) {
522	return;
523	}
524	KASSERT(swc->swc_nused <= swc->swc_nallocated);
525
526	slot = swc->swc_slot;
527	nused = swc->swc_nused;
528	nallocated = swc->swc_nallocated;
529
530	/*
531	* if this is the final pageout we could have a few
532	* unused swap blocks. if so, free them now.
533	*/
534
535	if (nused < nallocated) {
536	if (!now) {
537	return;
538	}
539	uvm_swap_free(slot + nused, nallocated - nused);
540	}
541
542	/*
543	* now start the pageout.
544	*/
545
546	if (nused > `0`) {
547	uvmexp.pdpageouts++;
548	uvm_pageout_start(nused);
549	error = uvm_swap_put(slot, swc->swc_pages, nused, `0`);
550	KASSERT(error == `0` \|\| error == ENOMEM);
551	}
552
553	/*
554	* zero swslot to indicate that we are
555	* no longer building a swap-backed cluster.
556	*/
557
558	swc->swc_slot = `0`;
559	swc->swc_nused = `0`;
560	}
561
562	static int
563	swapcluster_nused(struct swapcluster *swc)
564	{
565
566	return swc->swc_nused;
567	}
568
569	/*
570	* uvmpd_dropswap: free any swap allocated to this page.
571	*
572	* => called with owner locked.
573	* => return true if a page had an associated slot.
574	*/
575
576	static bool
577	uvmpd_dropswap(struct vm_page *pg)
578	{
579	bool result = false;
580	struct vm_anon *anon = pg->uanon;
581
582	if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
583	uvm_swap_free(anon->an_swslot, `1`);
584	anon->an_swslot = `0`;
585	pg->flags &= ~PG_CLEAN;
586	result = true;
587	} else if (pg->pqflags & PQ_AOBJ) {
588	int slot = uao_set_swslot(pg->uobject,
589	pg->offset >> PAGE_SHIFT, `0`);
590	if (slot) {
591	uvm_swap_free(slot, `1`);
592	pg->flags &= ~PG_CLEAN;
593	result = true;
594	}
595	}
596
597	return result;
598	}
599
600	/*
601	* uvmpd_trydropswap: try to free any swap allocated to this page.
602	*
603	* => return true if a slot is successfully freed.
604	*/
605
606	bool
607	uvmpd_trydropswap(struct vm_page *pg)
608	{
609	kmutex_t *slock;
610	bool result;
611
612	if ((pg->flags & PG_BUSY) != `0`) {
613	return false;
614	}
615
616	/*
617	* lock the page's owner.
618	*/
619
620	slock = uvmpd_trylockowner(pg);
621	if (slock == NULL) {
622	return false;
623	}
624
625	/*
626	* skip this page if it's busy.
627	*/
628
629	if ((pg->flags & PG_BUSY) != `0`) {
630	mutex_exit(slock);
631	return false;
632	}
633
634	result = uvmpd_dropswap(pg);
635
636	mutex_exit(slock);
637
638	return result;
639	}
640
641	#endif /* defined(VMSWAP) */
642
643	/*
644	* uvmpd_scan_queue: scan an replace candidate list for pages
645	* to clean or free.
646	*
647	* => called with page queues locked
648	* => we work on meeting our free target by converting inactive pages
649	* into free pages.
650	* => we handle the building of swap-backed clusters
651	*/
652
653	static void
654	uvmpd_scan_queue(void)
655	{
656	struct vm_page *p;
657	struct uvm_object *uobj;
658	struct vm_anon *anon;
659	#if defined(VMSWAP)
660	struct swapcluster swc;
661	#endif /* defined(VMSWAP) */
662	int dirtyreacts;
663	int lockownerfail;
664	kmutex_t *slock;
665	UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
666
667	/*
668	* swslot is non-zero if we are building a swap cluster. we want
669	* to stay in the loop while we have a page to scan or we have
670	* a swap-cluster to build.
671	*/
672
673	#if defined(VMSWAP)
674	swapcluster_init(&swc);
675	#endif /* defined(VMSWAP) */
676
677	dirtyreacts = `0`;
678	lockownerfail = `0`;
679	uvmpdpol_scaninit();
680
681	while (/ CONSTCOND / `1`) {
682
683	/*
684	* see if we've met the free target.
685	*/
686
687	if (uvmexp.free + uvmexp.paging
688	#if defined(VMSWAP)
689	+ swapcluster_nused(&swc)
690	#endif /* defined(VMSWAP) */
691	>= uvmexp.freetarg << `2` \|\|
692	dirtyreacts == UVMPD_NUMDIRTYREACTS) {
693	UVMHIST_LOG(pdhist," met free target: "
694	"exit loop", `0`, `0`, `0`, `0`);
695	break;
696	}
697
698	p = uvmpdpol_selectvictim();
699	if (p == NULL) {
700	break;
701	}
702	KASSERT(uvmpdpol_pageisqueued_p(p));
703	KASSERT(p->wire_count == `0`);
704
705	/*
706	* we are below target and have a new page to consider.
707	*/
708
709	anon = p->uanon;
710	uobj = p->uobject;
711
712	/*
713	* first we attempt to lock the object that this page
714	* belongs to. if our attempt fails we skip on to
715	* the next page (no harm done). it is important to
716	* "try" locking the object as we are locking in the
717	* wrong order (pageq -> object) and we don't want to
718	* deadlock.
719	*
720	* the only time we expect to see an ownerless page
721	* (i.e. a page with no uobject and !PQ_ANON) is if an
722	* anon has loaned a page from a uvm_object and the
723	* uvm_object has dropped the ownership. in that
724	* case, the anon can "take over" the loaned page
725	* and make it its own.
726	*/
727
728	slock = uvmpd_trylockowner(p);
729	if (slock == NULL) {
730	/*
731	* yield cpu to make a chance for an LWP holding
732	* the lock run. otherwise we can busy-loop too long
733	* if the page queue is filled with a lot of pages
734	* from few objects.
735	*/
736	lockownerfail++;
737	if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
738	mutex_exit(&uvm_pageqlock);
739	/ XXX Better than yielding but inadequate. /
740	kpause("livelock", false, `1`, NULL);
741	mutex_enter(&uvm_pageqlock);
742	lockownerfail = `0`;
743	}
744	continue;
745	}
746	if (p->flags & PG_BUSY) {
747	mutex_exit(slock);
748	uvmexp.pdbusy++;
749	continue;
750	}
751
752	/ does the page belong to an object? /
753	if (uobj != NULL) {
754	uvmexp.pdobscan++;
755	} else {
756	#if defined(VMSWAP)
757	KASSERT(anon != NULL);
758	uvmexp.pdanscan++;
759	#else /* defined(VMSWAP) */
760	panic("%s: anon", __func__);
761	#endif /* defined(VMSWAP) */
762	}
763
764
765	/*
766	* we now have the object and the page queues locked.
767	* if the page is not swap-backed, call the object's
768	* pager to flush and free the page.
769	*/
770
771	#if defined(READAHEAD_STATS)
772	if ((p->pqflags & PQ_READAHEAD) != `0`) {
773	p->pqflags &= ~PQ_READAHEAD;
774	uvm_ra_miss.ev_count++;
775	}
776	#endif /* defined(READAHEAD_STATS) */
777
778	if ((p->pqflags & PQ_SWAPBACKED) == `0`) {
779	KASSERT(uobj != NULL);
780	mutex_exit(&uvm_pageqlock);
781	(void) (uobj->pgops->pgo_put)(uobj, p->offset,
782	p->offset + PAGE_SIZE, PGO_CLEANIT\|PGO_FREE);
783	mutex_enter(&uvm_pageqlock);
784	continue;
785	}
786
787	/*
788	* the page is swap-backed. remove all the permissions
789	* from the page so we can sync the modified info
790	* without any race conditions. if the page is clean
791	* we can free it now and continue.
792	*/
793
794	pmap_page_protect(p, VM_PROT_NONE);
795	if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) {
796	p->flags &= ~(PG_CLEAN);
797	}
798	if (p->flags & PG_CLEAN) {
799	int slot;
800	int pageidx;
801
802	pageidx = p->offset >> PAGE_SHIFT;
803	uvm_pagefree(p);
804	uvmexp.pdfreed++;
805
806	/*
807	* for anons, we need to remove the page
808	* from the anon ourselves. for aobjs,
809	* pagefree did that for us.
810	*/
811
812	if (anon) {
813	KASSERT(anon->an_swslot != `0`);
814	anon->an_page = NULL;
815	slot = anon->an_swslot;
816	} else {
817	slot = uao_find_swslot(uobj, pageidx);
818	}
819	mutex_exit(slock);
820
821	if (slot > `0`) {
822	/ this page is now only in swap. /
823	mutex_enter(&uvm_swap_data_lock);
824	KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
825	uvmexp.swpgonly++;
826	mutex_exit(&uvm_swap_data_lock);
827	}
828	continue;
829	}
830
831	#if defined(VMSWAP)
832	/*
833	* this page is dirty, skip it if we'll have met our
834	* free target when all the current pageouts complete.
835	*/
836
837	if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << `2`) {
838	mutex_exit(slock);
839	continue;
840	}
841
842	/*
843	* free any swap space allocated to the page since
844	* we'll have to write it again with its new data.
845	*/
846
847	uvmpd_dropswap(p);
848
849	/*
850	* start new swap pageout cluster (if necessary).
851	*
852	* if swap is full reactivate this page so that
853	* we eventually cycle all pages through the
854	* inactive queue.
855	*/
856
857	if (swapcluster_allocslots(&swc)) {
858	dirtyreacts++;
859	uvm_pageactivate(p);
860	mutex_exit(slock);
861	continue;
862	}
863
864	/*
865	* at this point, we're definitely going reuse this
866	* page. mark the page busy and delayed-free.
867	* we should remove the page from the page queues
868	* so we don't ever look at it again.
869	* adjust counters and such.
870	*/
871
872	p->flags \|= PG_BUSY;
873	UVM_PAGE_OWN(p, "scan_queue");
874
875	p->flags \|= PG_PAGEOUT;
876	uvm_pagedequeue(p);
877
878	uvmexp.pgswapout++;
879	mutex_exit(&uvm_pageqlock);
880
881	/*
882	* add the new page to the cluster.
883	*/
884
885	if (swapcluster_add(&swc, p)) {
886	p->flags &= ~(PG_BUSY\|PG_PAGEOUT);
887	UVM_PAGE_OWN(p, NULL);
888	mutex_enter(&uvm_pageqlock);
889	dirtyreacts++;
890	uvm_pageactivate(p);
891	mutex_exit(slock);
892	continue;
893	}
894	mutex_exit(slock);
895
896	swapcluster_flush(&swc, false);
897	mutex_enter(&uvm_pageqlock);
898
899	/*
900	* the pageout is in progress. bump counters and set up
901	* for the next loop.
902	*/
903
904	uvmexp.pdpending++;
905
906	#else /* defined(VMSWAP) */
907	uvm_pageactivate(p);
908	mutex_exit(slock);
909	#endif /* defined(VMSWAP) */
910	}
911
912	#if defined(VMSWAP)
913	mutex_exit(&uvm_pageqlock);
914	swapcluster_flush(&swc, true);
915	mutex_enter(&uvm_pageqlock);
916	#endif /* defined(VMSWAP) */
917	}
918
919	/*
920	* uvmpd_scan: scan the page queues and attempt to meet our targets.
921	*
922	* => called with pageq's locked
923	*/
924
925	static void
926	uvmpd_scan(void)
927	{
928	int swap_shortage, pages_freed;
929	UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
930
931	uvmexp.pdrevs++;
932
933	/*
934	* work on meeting our targets. first we work on our free target
935	* by converting inactive pages into free pages. then we work on
936	* meeting our inactive target by converting active pages to
937	* inactive ones.
938	*/
939
940	UVMHIST_LOG(pdhist, " starting 'free' loop",`0`,`0`,`0`,`0`);
941
942	pages_freed = uvmexp.pdfreed;
943	uvmpd_scan_queue();
944	pages_freed = uvmexp.pdfreed - pages_freed;
945
946	/*
947	* detect if we're not going to be able to page anything out
948	* until we free some swap resources from active pages.
949	*/
950
951	swap_shortage = `0`;
952	if (uvmexp.free < uvmexp.freetarg &&
953	uvmexp.swpginuse >= uvmexp.swpgavail &&
954	!uvm_swapisfull() &&
955	pages_freed == `0`) {
956	swap_shortage = uvmexp.freetarg - uvmexp.free;
957	}
958
959	uvmpdpol_balancequeue(swap_shortage);
960
961	/*
962	* if still below the minimum target, try unloading kernel
963	* modules.
964	*/
965
966	if (uvmexp.free < uvmexp.freemin) {
967	module_thread_kick();
968	}
969	}
970
971	/*
972	* uvm_reclaimable: decide whether to wait for pagedaemon.
973	*
974	* => return true if it seems to be worth to do uvm_wait.
975	*
976	* XXX should be tunable.
977	* XXX should consider pools, etc?
978	*/
979
980	bool
981	uvm_reclaimable(void)
982	{
983	int filepages;
984	int active, inactive;
985
986	/*
987	* if swap is not full, no problem.
988	*/
989
990	if (!uvm_swapisfull()) {
991	return true;
992	}
993
994	/*
995	* file-backed pages can be reclaimed even when swap is full.
996	* if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
997	*
998	* XXX assume the worst case, ie. all wired pages are file-backed.
999	*
1000	* XXX should consider about other reclaimable memory.
1001	* XXX ie. pools, traditional buffer cache.
1002	*/
1003
1004	filepages = uvmexp.filepages + uvmexp.execpages - uvmexp.wired;
1005	uvm_estimatepageable(&active, &inactive);
1006	if (filepages >= MIN((active + inactive) >> `4`,
1007	`5` * `1024` * `1024` >> PAGE_SHIFT)) {
1008	return true;
1009	}
1010
1011	/*
1012	* kill the process, fail allocation, etc..
1013	*/
1014
1015	return false;
1016	}
1017
1018	void
1019	uvm_estimatepageable(int active, int* *inactive)
1020	{
1021
1022	uvmpdpol_estimatepageable(active, inactive);
1023	}
1024
1025

Browse the source code of src/src/sys/uvm/uvm_pdaemon.c