/* $NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $ */ /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ /*- * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #if defined(PDSIM) #include "pdsim.h" #else /* defined(PDSIM) */ #include __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $"); #include #include #include #include #include #include #include #include #include #include #endif /* defined(PDSIM) */ /* * per-CPU queue of pending page status changes. 128 entries makes for a * 1kB queue on _LP64 and has been found to be a reasonable compromise that * keeps lock contention events and wait times low, while not using too much * memory nor allowing global state to fall too far behind. */ #if !defined(CLOCK_PDQ_SIZE) #define CLOCK_PDQ_SIZE 128 #endif /* !defined(CLOCK_PDQ_SIZE) */ #define PQ_INACTIVE 0x00000010 /* page is in inactive list */ #define PQ_ACTIVE 0x00000020 /* page is in active list */ #if !defined(CLOCK_INACTIVEPCT) #define CLOCK_INACTIVEPCT 33 #endif /* !defined(CLOCK_INACTIVEPCT) */ struct uvmpdpol_globalstate { kmutex_t lock; /* lock on state */ /* <= compiler pads here */ struct pglist s_activeq /* allocated pages, in use */ __aligned(COHERENCY_UNIT); struct pglist s_inactiveq; /* pages between the clock hands */ int s_active; int s_inactive; int s_inactarg; struct uvm_pctparam s_anonmin; struct uvm_pctparam s_filemin; struct uvm_pctparam s_execmin; struct uvm_pctparam s_anonmax; struct uvm_pctparam s_filemax; struct uvm_pctparam s_execmax; struct uvm_pctparam s_inactivepct; }; struct uvmpdpol_scanstate { bool ss_anonreact, ss_filereact, ss_execreact; struct vm_page ss_marker; }; static void uvmpdpol_pageactivate_locked(struct vm_page *); static void uvmpdpol_pagedeactivate_locked(struct vm_page *); static void uvmpdpol_pagedequeue_locked(struct vm_page *); static bool uvmpdpol_pagerealize_locked(struct vm_page *); static struct uvm_cpu *uvmpdpol_flush(void); static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; static struct uvmpdpol_scanstate pdpol_scanstate; PDPOL_EVCNT_DEFINE(reactexec) PDPOL_EVCNT_DEFINE(reactfile) PDPOL_EVCNT_DEFINE(reactanon) static void clock_tune(void) { struct uvmpdpol_globalstate *s = &pdpol_state; s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct, s->s_active + s->s_inactive); if (s->s_inactarg <= uvmexp.freetarg) { s->s_inactarg = uvmexp.freetarg + 1; } } void uvmpdpol_scaninit(void) { struct uvmpdpol_globalstate *s = &pdpol_state; struct uvmpdpol_scanstate *ss = &pdpol_scanstate; int t; bool anonunder, fileunder, execunder; bool anonover, fileover, execover; bool anonreact, filereact, execreact; int64_t freepg, anonpg, filepg, execpg; /* * decide which types of pages we want to reactivate instead of freeing * to keep usage within the minimum and maximum usage limits. * uvm_availmem() will sync the counters. */ freepg = uvm_availmem(false); anonpg = cpu_count_get(CPU_COUNT_ANONCLEAN) + cpu_count_get(CPU_COUNT_ANONDIRTY) + cpu_count_get(CPU_COUNT_ANONUNKNOWN); execpg = cpu_count_get(CPU_COUNT_EXECPAGES); filepg = cpu_count_get(CPU_COUNT_FILECLEAN) + cpu_count_get(CPU_COUNT_FILEDIRTY) + cpu_count_get(CPU_COUNT_FILEUNKNOWN) - execpg; mutex_enter(&s->lock); t = s->s_active + s->s_inactive + freepg; anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t); fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t); execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t); anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t); fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t); execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t); anonreact = anonunder || (!anonover && (fileover || execover)); filereact = fileunder || (!fileover && (anonover || execover)); execreact = execunder || (!execover && (anonover || fileover)); if (filereact && execreact && (anonreact || uvm_swapisfull())) { anonreact = filereact = execreact = false; } ss->ss_anonreact = anonreact; ss->ss_filereact = filereact; ss->ss_execreact = execreact; memset(&ss->ss_marker, 0, sizeof(ss->ss_marker)); ss->ss_marker.flags = PG_MARKER; TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); mutex_exit(&s->lock); } void uvmpdpol_scanfini(void) { struct uvmpdpol_globalstate *s = &pdpol_state; struct uvmpdpol_scanstate *ss = &pdpol_scanstate; mutex_enter(&s->lock); TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); mutex_exit(&s->lock); } struct vm_page * uvmpdpol_selectvictim(krwlock_t **plock) { struct uvmpdpol_globalstate *s = &pdpol_state; struct uvmpdpol_scanstate *ss = &pdpol_scanstate; struct vm_page *pg; krwlock_t *lock; mutex_enter(&s->lock); while (/* CONSTCOND */ 1) { struct vm_anon *anon; struct uvm_object *uobj; pg = TAILQ_NEXT(&ss->ss_marker, pdqueue); if (pg == NULL) { break; } KASSERT((pg->flags & PG_MARKER) == 0); uvmexp.pdscans++; /* * acquire interlock to stabilize page identity. * if we have caught the page in a state of flux * deal with it and retry. */ mutex_enter(&pg->interlock); if (uvmpdpol_pagerealize_locked(pg)) { mutex_exit(&pg->interlock); continue; } /* * now prepare to move on to the next page. */ TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg, &ss->ss_marker, pdqueue); /* * enforce the minimum thresholds on different * types of memory usage. if reusing the current * page would reduce that type of usage below its * minimum, reactivate the page instead and move * on to the next page. */ anon = pg->uanon; uobj = pg->uobject; if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { uvmpdpol_pageactivate_locked(pg); mutex_exit(&pg->interlock); PDPOL_EVCNT_INCR(reactexec); continue; } if (uobj && UVM_OBJ_IS_VNODE(uobj) && !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { uvmpdpol_pageactivate_locked(pg); mutex_exit(&pg->interlock); PDPOL_EVCNT_INCR(reactfile); continue; } if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { uvmpdpol_pageactivate_locked(pg); mutex_exit(&pg->interlock); PDPOL_EVCNT_INCR(reactanon); continue; } /* * try to lock the object that owns the page. * * with the page interlock held, we can drop s->lock, which * could otherwise serve as a barrier to us getting the * object locked, because the owner of the object's lock may * be blocked on s->lock (i.e. a deadlock). * * whatever happens, uvmpd_trylockowner() will release the * interlock. with the interlock dropped we can then * re-acquire our own lock. the order is: * * object -> pdpol -> interlock. */ mutex_exit(&s->lock); lock = uvmpd_trylockowner(pg); /* pg->interlock now released */ mutex_enter(&s->lock); if (lock == NULL) { /* didn't get it - try the next page. */ continue; } /* * move referenced pages back to active queue and skip to * next page. */ if (pmap_is_referenced(pg)) { mutex_enter(&pg->interlock); uvmpdpol_pageactivate_locked(pg); mutex_exit(&pg->interlock); uvmexp.pdreact++; rw_exit(lock); continue; } /* we have a potential victim. */ *plock = lock; break; } mutex_exit(&s->lock); return pg; } void uvmpdpol_balancequeue(int swap_shortage) { struct uvmpdpol_globalstate *s = &pdpol_state; int inactive_shortage; struct vm_page *p, marker; krwlock_t *lock; /* * we have done the scan to get free pages. now we work on meeting * our inactive target. */ memset(&marker, 0, sizeof(marker)); marker.flags = PG_MARKER; mutex_enter(&s->lock); TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue); for (;;) { inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive; if (inactive_shortage <= 0 && swap_shortage <= 0) { break; } p = TAILQ_NEXT(&marker, pdqueue); if (p == NULL) { break; } KASSERT((p->flags & PG_MARKER) == 0); /* * acquire interlock to stabilize page identity. * if we have caught the page in a state of flux * deal with it and retry. */ mutex_enter(&p->interlock); if (uvmpdpol_pagerealize_locked(p)) { mutex_exit(&p->interlock); continue; } /* * now prepare to move on to the next page. */ TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker, pdqueue); /* * try to lock the object that owns the page. see comments * in uvmpdol_selectvictim(). */ mutex_exit(&s->lock); lock = uvmpd_trylockowner(p); /* p->interlock now released */ mutex_enter(&s->lock); if (lock == NULL) { /* didn't get it - try the next page. */ continue; } /* * if there's a shortage of swap slots, try to free it. */ if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 && (p->flags & PG_BUSY) == 0) { if (uvmpd_dropswap(p)) { swap_shortage--; } } /* * if there's a shortage of inactive pages, deactivate. */ if (inactive_shortage > 0) { pmap_clear_reference(p); mutex_enter(&p->interlock); uvmpdpol_pagedeactivate_locked(p); mutex_exit(&p->interlock); uvmexp.pddeact++; inactive_shortage--; } rw_exit(lock); } TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); mutex_exit(&s->lock); } static void uvmpdpol_pagedeactivate_locked(struct vm_page *pg) { struct uvmpdpol_globalstate *s __diagused = &pdpol_state; KASSERT(mutex_owned(&s->lock)); KASSERT(mutex_owned(&pg->interlock)); KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != (PQ_INTENT_D | PQ_INTENT_SET)); if (pg->pqflags & PQ_ACTIVE) { TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; } if ((pg->pqflags & PQ_INACTIVE) == 0) { KASSERT(pg->wire_count == 0); TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue); pdpol_state.s_inactive++; } pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET); pg->pqflags |= PQ_INACTIVE; } void uvmpdpol_pagedeactivate(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, false)); KASSERT(mutex_owned(&pg->interlock)); /* * we have to clear the reference bit now, as when it comes time to * realize the intent we won't have the object locked any more. */ pmap_clear_reference(pg); uvmpdpol_set_intent(pg, PQ_INTENT_I); } static void uvmpdpol_pageactivate_locked(struct vm_page *pg) { struct uvmpdpol_globalstate *s __diagused = &pdpol_state; KASSERT(mutex_owned(&s->lock)); KASSERT(mutex_owned(&pg->interlock)); KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != (PQ_INTENT_D | PQ_INTENT_SET)); uvmpdpol_pagedequeue_locked(pg); TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); pdpol_state.s_active++; pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET); pg->pqflags |= PQ_ACTIVE; } void uvmpdpol_pageactivate(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, false)); KASSERT(mutex_owned(&pg->interlock)); uvmpdpol_set_intent(pg, PQ_INTENT_A); } static void uvmpdpol_pagedequeue_locked(struct vm_page *pg) { struct uvmpdpol_globalstate *s __diagused = &pdpol_state; KASSERT(mutex_owned(&s->lock)); KASSERT(mutex_owned(&pg->interlock)); if (pg->pqflags & PQ_ACTIVE) { TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); KASSERT((pg->pqflags & PQ_INACTIVE) == 0); KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; } else if (pg->pqflags & PQ_INACTIVE) { TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); KASSERT(pdpol_state.s_inactive > 0); pdpol_state.s_inactive--; } pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET); } void uvmpdpol_pagedequeue(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, true)); KASSERT(mutex_owned(&pg->interlock)); uvmpdpol_set_intent(pg, PQ_INTENT_D); } void uvmpdpol_pageenqueue(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, false)); KASSERT(mutex_owned(&pg->interlock)); uvmpdpol_set_intent(pg, PQ_INTENT_E); } void uvmpdpol_anfree(struct vm_anon *an) { } bool uvmpdpol_pageisqueued_p(struct vm_page *pg) { uint32_t pqflags; /* * if there's an intent set, we have to consider it. otherwise, * return the actual state. we may be called unlocked for the * purpose of assertions, which is safe due to the page lifecycle. */ pqflags = atomic_load_relaxed(&pg->pqflags); if ((pqflags & PQ_INTENT_SET) != 0) { return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D; } else { return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; } } bool uvmpdpol_pageactivate_p(struct vm_page *pg) { uint32_t pqflags; /* consider intent in preference to actual state. */ pqflags = atomic_load_relaxed(&pg->pqflags); if ((pqflags & PQ_INTENT_SET) != 0) { pqflags &= PQ_INTENT_MASK; return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E; } else { /* * TODO: Enabling this may be too much of a big hammer, * since we do get useful information from activations. * Think about it more and maybe come up with a heuristic * or something. * * return (pqflags & PQ_ACTIVE) == 0; */ return true; } } void uvmpdpol_estimatepageable(int *active, int *inactive) { struct uvmpdpol_globalstate *s = &pdpol_state; /* * Don't take any locks here. This can be called from DDB, and in * any case the numbers are stale the instant the lock is dropped, * so it just doesn't matter. */ if (active) { *active = s->s_active; } if (inactive) { *inactive = s->s_inactive; } } #if !defined(PDSIM) static int min_check(struct uvm_pctparam *pct, int t) { struct uvmpdpol_globalstate *s = &pdpol_state; int total = t; if (pct != &s->s_anonmin) { total += uvm_pctparam_get(&s->s_anonmin); } if (pct != &s->s_filemin) { total += uvm_pctparam_get(&s->s_filemin); } if (pct != &s->s_execmin) { total += uvm_pctparam_get(&s->s_execmin); } if (total > 95) { return EINVAL; } return 0; } #endif /* !defined(PDSIM) */ void uvmpdpol_init(void) { struct uvmpdpol_globalstate *s = &pdpol_state; mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&s->s_activeq); TAILQ_INIT(&s->s_inactiveq); uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL); uvm_pctparam_init(&s->s_anonmin, 10, min_check); uvm_pctparam_init(&s->s_filemin, 10, min_check); uvm_pctparam_init(&s->s_execmin, 5, min_check); uvm_pctparam_init(&s->s_anonmax, 80, NULL); uvm_pctparam_init(&s->s_filemax, 50, NULL); uvm_pctparam_init(&s->s_execmax, 30, NULL); } void uvmpdpol_init_cpu(struct uvm_cpu *ucpu) { ucpu->pdq = kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP); ucpu->pdqhead = CLOCK_PDQ_SIZE; ucpu->pdqtail = CLOCK_PDQ_SIZE; } void uvmpdpol_reinit(void) { } bool uvmpdpol_needsscan_p(void) { /* * this must be an unlocked check: can be called from interrupt. */ return pdpol_state.s_inactive < pdpol_state.s_inactarg; } void uvmpdpol_tune(void) { struct uvmpdpol_globalstate *s = &pdpol_state; mutex_enter(&s->lock); clock_tune(); mutex_exit(&s->lock); } /* * uvmpdpol_pagerealize_locked: take the intended state set on a page and * make it real. return true if any work was done. */ static bool uvmpdpol_pagerealize_locked(struct vm_page *pg) { struct uvmpdpol_globalstate *s __diagused = &pdpol_state; KASSERT(mutex_owned(&s->lock)); KASSERT(mutex_owned(&pg->interlock)); switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) { case PQ_INTENT_A | PQ_INTENT_SET: case PQ_INTENT_E | PQ_INTENT_SET: uvmpdpol_pageactivate_locked(pg); return true; case PQ_INTENT_I | PQ_INTENT_SET: uvmpdpol_pagedeactivate_locked(pg); return true; case PQ_INTENT_D | PQ_INTENT_SET: uvmpdpol_pagedequeue_locked(pg); return true; default: return false; } } /* * uvmpdpol_flush: return the current uvm_cpu with all of its pending * updates flushed to the global queues. this routine may block, and * so can switch cpu. the idea is to empty to queue on whatever cpu * we finally end up on. */ static struct uvm_cpu * uvmpdpol_flush(void) { struct uvmpdpol_globalstate *s __diagused = &pdpol_state; struct uvm_cpu *ucpu; struct vm_page *pg; KASSERT(kpreempt_disabled()); mutex_enter(&s->lock); for (;;) { /* * prefer scanning forwards (even though mutex_enter() is * serializing) so as to not defeat any prefetch logic in * the CPU. that means elsewhere enqueuing backwards, like * a stack, but not so important there as pages are being * added singularly. * * prefetch the next "struct vm_page" while working on the * current one. this has a measurable and very positive * effect in reducing the amount of time spent here under * the global lock. */ ucpu = curcpu()->ci_data.cpu_uvm; KASSERT(ucpu->pdqhead <= ucpu->pdqtail); if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) { break; } pg = ucpu->pdq[ucpu->pdqhead++]; if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) { __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]); } mutex_enter(&pg->interlock); pg->pqflags &= ~PQ_INTENT_QUEUED; (void)uvmpdpol_pagerealize_locked(pg); mutex_exit(&pg->interlock); } mutex_exit(&s->lock); return ucpu; } /* * uvmpdpol_pagerealize: realize any intent set on the page. in this * implementation, that means putting the page on a per-CPU queue to be * dealt with later. */ void uvmpdpol_pagerealize(struct vm_page *pg) { struct uvm_cpu *ucpu; /* * drain the per per-CPU queue if full, then enter the page. */ kpreempt_disable(); ucpu = curcpu()->ci_data.cpu_uvm; if (__predict_false(ucpu->pdqhead == 0)) { ucpu = uvmpdpol_flush(); } ucpu->pdq[--(ucpu->pdqhead)] = pg; kpreempt_enable(); } /* * uvmpdpol_idle: called from the system idle loop. periodically purge any * pending updates back to the global queues. */ void uvmpdpol_idle(struct uvm_cpu *ucpu) { struct uvmpdpol_globalstate *s = &pdpol_state; struct vm_page *pg; KASSERT(kpreempt_disabled()); /* * if no pages in the queue, we have nothing to do. */ if (ucpu->pdqhead == ucpu->pdqtail) { ucpu->pdqtime = getticks(); return; } /* * don't do this more than ~8 times a second as it would needlessly * exert pressure. */ if (getticks() - ucpu->pdqtime < (hz >> 3)) { return; } /* * the idle LWP can't block, so we have to try for the lock. if we * get it, purge the per-CPU pending update queue. continually * check for a pending resched: in that case exit immediately. */ if (mutex_tryenter(&s->lock)) { while (ucpu->pdqhead != ucpu->pdqtail) { pg = ucpu->pdq[ucpu->pdqhead]; if (!mutex_tryenter(&pg->interlock)) { break; } ucpu->pdqhead++; pg->pqflags &= ~PQ_INTENT_QUEUED; (void)uvmpdpol_pagerealize_locked(pg); mutex_exit(&pg->interlock); if (curcpu()->ci_want_resched) { break; } } if (ucpu->pdqhead == ucpu->pdqtail) { ucpu->pdqtime = getticks(); } mutex_exit(&s->lock); } } #if !defined(PDSIM) #include /* XXX SYSCTL_DESCR */ void uvmpdpol_sysctlsetup(void) { struct uvmpdpol_globalstate *s = &pdpol_state; uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin", SYSCTL_DESCR("Percentage of physical memory reserved " "for anonymous application data")); uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin", SYSCTL_DESCR("Percentage of physical memory reserved " "for cached file data")); uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin", SYSCTL_DESCR("Percentage of physical memory reserved " "for cached executable data")); uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax", SYSCTL_DESCR("Percentage of physical memory which will " "be reclaimed from other usage for " "anonymous application data")); uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax", SYSCTL_DESCR("Percentage of physical memory which will " "be reclaimed from other usage for cached " "file data")); uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax", SYSCTL_DESCR("Percentage of physical memory which will " "be reclaimed from other usage for cached " "executable data")); uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct", SYSCTL_DESCR("Percentage of inactive queue of " "the entire (active + inactive) queue")); } #endif /* !defined(PDSIM) */ #if defined(PDSIM) void pdsim_dump(const char *id) { #if defined(DEBUG) /* XXX */ #endif /* defined(DEBUG) */ } #endif /* defined(PDSIM) */