1/* $NetBSD: uvm_loan.c,v 1.83 2012/07/30 23:56:48 matt Exp $ */
2
3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28 */
29
30/*
31 * uvm_loan.c: page loanout handler
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.83 2012/07/30 23:56:48 matt Exp $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/mman.h>
41
42#include <uvm/uvm.h>
43
44#ifdef UVMHIST
45UVMHIST_DEFINE(loanhist);
46#endif
47
48/*
49 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
50 * from the VM system to other parts of the kernel. this allows page
51 * copying to be avoided (e.g. you can loan pages from objs/anons to
52 * the mbuf system).
53 *
54 * there are 3 types of loans possible:
55 * O->K uvm_object page to wired kernel page (e.g. mbuf data area)
56 * A->K anon page to wired kernel page (e.g. mbuf data area)
57 * O->A uvm_object to anon loan (e.g. vnode page to an anon)
58 * note that it possible to have an O page loaned to both an A and K
59 * at the same time.
60 *
61 * loans are tracked by pg->loan_count. an O->A page will have both
62 * a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort
63 * of page is considered "owned" by the uvm_object (not the anon).
64 *
65 * each loan of a page to the kernel bumps the pg->wire_count. the
66 * kernel mappings for these pages will be read-only and wired. since
67 * the page will also be wired, it will not be a candidate for pageout,
68 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a
69 * write fault in the kernel to one of these pages will not cause
70 * copy-on-write. instead, the page fault is considered fatal. this
71 * is because the kernel mapping will have no way to look up the
72 * object/anon which the page is owned by. this is a good side-effect,
73 * since a kernel write to a loaned page is an error.
74 *
75 * owners that want to free their pages and discover that they are
76 * loaned out simply "disown" them (the page becomes an orphan). these
77 * pages should be freed when the last loan is dropped. in some cases
78 * an anon may "adopt" an orphaned page.
79 *
80 * locking: to read pg->loan_count either the owner or the page queues
81 * must be locked. to modify pg->loan_count, both the owner of the page
82 * and the PQs must be locked. pg->flags is (as always) locked by
83 * the owner of the page.
84 *
85 * note that locking from the "loaned" side is tricky since the object
86 * getting the loaned page has no reference to the page's owner and thus
87 * the owner could "die" at any time. in order to prevent the owner
88 * from dying the page queues should be locked. this forces us to sometimes
89 * use "try" locking.
90 *
91 * loans are typically broken by the following events:
92 * 1. user-level xwrite fault to a loaned page
93 * 2. pageout of clean+inactive O->A loaned page
94 * 3. owner frees page (e.g. pager flush)
95 *
96 * note that loaning a page causes all mappings of the page to become
97 * read-only (via pmap_page_protect). this could have an unexpected
98 * effect on normal "wired" pages if one is not careful (XXX).
99 */
100
101/*
102 * local prototypes
103 */
104
105static int uvm_loananon(struct uvm_faultinfo *, void ***,
106 int, struct vm_anon *);
107static int uvm_loanuobj(struct uvm_faultinfo *, void ***,
108 int, vaddr_t);
109static int uvm_loanzero(struct uvm_faultinfo *, void ***, int);
110static void uvm_unloananon(struct vm_anon **, int);
111static void uvm_unloanpage(struct vm_page **, int);
112static int uvm_loanpage(struct vm_page **, int);
113
114
115/*
116 * inlines
117 */
118
119/*
120 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
121 *
122 * => "ufi" is the result of a successful map lookup (meaning that
123 * on entry the map is locked by the caller)
124 * => we may unlock and then relock the map if needed (for I/O)
125 * => we put our output result in "output"
126 * => we always return with the map unlocked
127 * => possible return values:
128 * -1 == error, map is unlocked
129 * 0 == map relock error (try again!), map is unlocked
130 * >0 == number of pages we loaned, map is unlocked
131 *
132 * NOTE: We can live with this being an inline, because it is only called
133 * from one place.
134 */
135
136static inline int
137uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
138{
139 vaddr_t curaddr = ufi->orig_rvaddr;
140 vsize_t togo = ufi->size;
141 struct vm_aref *aref = &ufi->entry->aref;
142 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
143 struct vm_anon *anon;
144 int rv, result = 0;
145
146 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
147
148 /*
149 * lock us the rest of the way down (we unlock before return)
150 */
151 if (aref->ar_amap) {
152 amap_lock(aref->ar_amap);
153 }
154
155 /*
156 * loop until done
157 */
158 while (togo) {
159
160 /*
161 * find the page we want. check the anon layer first.
162 */
163
164 if (aref->ar_amap) {
165 anon = amap_lookup(aref, curaddr - ufi->entry->start);
166 } else {
167 anon = NULL;
168 }
169
170 /* locked: map, amap, uobj */
171 if (anon) {
172 rv = uvm_loananon(ufi, output, flags, anon);
173 } else if (uobj) {
174 rv = uvm_loanuobj(ufi, output, flags, curaddr);
175 } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
176 rv = uvm_loanzero(ufi, output, flags);
177 } else {
178 uvmfault_unlockall(ufi, aref->ar_amap, uobj);
179 rv = -1;
180 }
181 /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */
182 KASSERT(rv > 0 || aref->ar_amap == NULL ||
183 !mutex_owned(aref->ar_amap->am_lock));
184 KASSERT(rv > 0 || uobj == NULL ||
185 !mutex_owned(uobj->vmobjlock));
186
187 /* total failure */
188 if (rv < 0) {
189 UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
190 return (-1);
191 }
192
193 /* relock failed, need to do another lookup */
194 if (rv == 0) {
195 UVMHIST_LOG(loanhist, "relock failure %d", result
196 ,0,0,0);
197 return (result);
198 }
199
200 /*
201 * got it... advance to next page
202 */
203
204 result++;
205 togo -= PAGE_SIZE;
206 curaddr += PAGE_SIZE;
207 }
208
209 /*
210 * unlock what we locked, unlock the maps and return
211 */
212
213 if (aref->ar_amap) {
214 amap_unlock(aref->ar_amap);
215 }
216 uvmfault_unlockmaps(ufi, false);
217 UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
218 return (result);
219}
220
221/*
222 * normal functions
223 */
224
225/*
226 * uvm_loan: loan pages in a map out to anons or to the kernel
227 *
228 * => map should be unlocked
229 * => start and len should be multiples of PAGE_SIZE
230 * => result is either an array of anon's or vm_pages (depending on flags)
231 * => flag values: UVM_LOAN_TOANON - loan to anons
232 * UVM_LOAN_TOPAGE - loan to wired kernel page
233 * one and only one of these flags must be set!
234 * => returns 0 (success), or an appropriate error number
235 */
236
237int
238uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
239{
240 struct uvm_faultinfo ufi;
241 void **result, **output;
242 int rv, error;
243
244 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
245
246 /*
247 * ensure that one and only one of the flags is set
248 */
249
250 KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
251 ((flags & UVM_LOAN_TOPAGE) == 0));
252
253 /*
254 * "output" is a pointer to the current place to put the loaned page.
255 */
256
257 result = v;
258 output = &result[0]; /* start at the beginning ... */
259
260 /*
261 * while we've got pages to do
262 */
263
264 while (len > 0) {
265
266 /*
267 * fill in params for a call to uvmfault_lookup
268 */
269
270 ufi.orig_map = map;
271 ufi.orig_rvaddr = start;
272 ufi.orig_size = len;
273
274 /*
275 * do the lookup, the only time this will fail is if we hit on
276 * an unmapped region (an error)
277 */
278
279 if (!uvmfault_lookup(&ufi, false)) {
280 error = ENOENT;
281 goto fail;
282 }
283
284 /*
285 * map now locked. now do the loanout...
286 */
287
288 rv = uvm_loanentry(&ufi, &output, flags);
289 if (rv < 0) {
290 /* all unlocked due to error */
291 error = EINVAL;
292 goto fail;
293 }
294
295 /*
296 * done! the map is unlocked. advance, if possible.
297 *
298 * XXXCDC: could be recoded to hold the map lock with
299 * smarter code (but it only happens on map entry
300 * boundaries, so it isn't that bad).
301 */
302
303 if (rv) {
304 rv <<= PAGE_SHIFT;
305 len -= rv;
306 start += rv;
307 }
308 }
309 UVMHIST_LOG(loanhist, "success", 0,0,0,0);
310 return 0;
311
312fail:
313 /*
314 * failed to complete loans. drop any loans and return failure code.
315 * map is already unlocked.
316 */
317
318 if (output - result) {
319 if (flags & UVM_LOAN_TOANON) {
320 uvm_unloananon((struct vm_anon **)result,
321 output - result);
322 } else {
323 uvm_unloanpage((struct vm_page **)result,
324 output - result);
325 }
326 }
327 UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
328 return (error);
329}
330
331/*
332 * uvm_loananon: loan a page from an anon out
333 *
334 * => called with map, amap, uobj locked
335 * => return value:
336 * -1 = fatal error, everything is unlocked, abort.
337 * 0 = lookup in ufi went stale, everything unlocked, relookup and
338 * try again
339 * 1 = got it, everything still locked
340 */
341
342int
343uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
344 struct vm_anon *anon)
345{
346 struct vm_page *pg;
347 int error;
348
349 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
350
351 /*
352 * if we are loaning to "another" anon then it is easy, we just
353 * bump the reference count on the current anon and return a
354 * pointer to it (it becomes copy-on-write shared).
355 */
356
357 if (flags & UVM_LOAN_TOANON) {
358 KASSERT(mutex_owned(anon->an_lock));
359 pg = anon->an_page;
360 if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
361 if (pg->wire_count > 0) {
362 UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
363 uvmfault_unlockall(ufi,
364 ufi->entry->aref.ar_amap,
365 ufi->entry->object.uvm_obj);
366 return (-1);
367 }
368 pmap_page_protect(pg, VM_PROT_READ);
369 }
370 anon->an_ref++;
371 **output = anon;
372 (*output)++;
373 UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
374 return (1);
375 }
376
377 /*
378 * we are loaning to a kernel-page. we need to get the page
379 * resident so we can wire it. uvmfault_anonget will handle
380 * this for us.
381 */
382
383 KASSERT(mutex_owned(anon->an_lock));
384 error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
385
386 /*
387 * if we were unable to get the anon, then uvmfault_anonget has
388 * unlocked everything and returned an error code.
389 */
390
391 if (error) {
392 UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
393
394 /* need to refault (i.e. refresh our lookup) ? */
395 if (error == ERESTART) {
396 return (0);
397 }
398
399 /* "try again"? sleep a bit and retry ... */
400 if (error == EAGAIN) {
401 kpause("loanagain", false, hz/2, NULL);
402 return (0);
403 }
404
405 /* otherwise flag it as an error */
406 return (-1);
407 }
408
409 /*
410 * we have the page and its owner locked: do the loan now.
411 */
412
413 pg = anon->an_page;
414 mutex_enter(&uvm_pageqlock);
415 if (pg->wire_count > 0) {
416 mutex_exit(&uvm_pageqlock);
417 UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
418 KASSERT(pg->uobject == NULL);
419 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
420 return (-1);
421 }
422 if (pg->loan_count == 0) {
423 pmap_page_protect(pg, VM_PROT_READ);
424 }
425 pg->loan_count++;
426 uvm_pageactivate(pg);
427 mutex_exit(&uvm_pageqlock);
428 **output = pg;
429 (*output)++;
430
431 /* unlock and return success */
432 if (pg->uobject)
433 mutex_exit(pg->uobject->vmobjlock);
434 UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
435 return (1);
436}
437
438/*
439 * uvm_loanpage: loan out pages to kernel (->K)
440 *
441 * => pages should be object-owned and the object should be locked.
442 * => in the case of error, the object might be unlocked and relocked.
443 * => caller should busy the pages beforehand.
444 * => pages will be unbusied.
445 * => fail with EBUSY if meet a wired page.
446 */
447static int
448uvm_loanpage(struct vm_page **pgpp, int npages)
449{
450 int i;
451 int error = 0;
452
453 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
454
455 for (i = 0; i < npages; i++) {
456 struct vm_page *pg = pgpp[i];
457
458 KASSERT(pg->uobject != NULL);
459 KASSERT(pg->uobject == pgpp[0]->uobject);
460 KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
461 KASSERT(mutex_owned(pg->uobject->vmobjlock));
462 KASSERT(pg->flags & PG_BUSY);
463
464 mutex_enter(&uvm_pageqlock);
465 if (pg->wire_count > 0) {
466 mutex_exit(&uvm_pageqlock);
467 UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
468 error = EBUSY;
469 break;
470 }
471 if (pg->loan_count == 0) {
472 pmap_page_protect(pg, VM_PROT_READ);
473 }
474 pg->loan_count++;
475 uvm_pageactivate(pg);
476 mutex_exit(&uvm_pageqlock);
477 }
478
479 uvm_page_unbusy(pgpp, npages);
480
481 if (error) {
482 /*
483 * backout what we've done
484 */
485 kmutex_t *slock = pgpp[0]->uobject->vmobjlock;
486
487 mutex_exit(slock);
488 uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
489 mutex_enter(slock);
490 }
491
492 UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
493 return error;
494}
495
496/*
497 * XXX UBC temp limit
498 * number of pages to get at once.
499 * should be <= MAX_READ_AHEAD in genfs_vnops.c
500 */
501#define UVM_LOAN_GET_CHUNK 16
502
503/*
504 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
505 *
506 * => uobj shouldn't be locked. (we'll lock it)
507 * => fail with EBUSY if we meet a wired page.
508 */
509int
510uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
511 struct vm_page **origpgpp)
512{
513 int ndone; /* # of pages loaned out */
514 struct vm_page **pgpp;
515 int error;
516 int i;
517 kmutex_t *slock;
518
519 pgpp = origpgpp;
520 for (ndone = 0; ndone < orignpages; ) {
521 int npages;
522 /* npendloan: # of pages busied but not loand out yet. */
523 int npendloan = 0xdead; /* XXX gcc */
524reget:
525 npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
526 mutex_enter(uobj->vmobjlock);
527 error = (*uobj->pgops->pgo_get)(uobj,
528 pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
529 VM_PROT_READ, 0, PGO_SYNCIO);
530 if (error == EAGAIN) {
531 kpause("loanuopg", false, hz/2, NULL);
532 continue;
533 }
534 if (error)
535 goto fail;
536
537 KASSERT(npages > 0);
538
539 /* loan and unbusy pages */
540 slock = NULL;
541 for (i = 0; i < npages; i++) {
542 kmutex_t *nextslock; /* slock for next page */
543 struct vm_page *pg = *pgpp;
544
545 /* XXX assuming that the page is owned by uobj */
546 KASSERT(pg->uobject != NULL);
547 nextslock = pg->uobject->vmobjlock;
548
549 if (slock != nextslock) {
550 if (slock) {
551 KASSERT(npendloan > 0);
552 error = uvm_loanpage(pgpp - npendloan,
553 npendloan);
554 mutex_exit(slock);
555 if (error)
556 goto fail;
557 ndone += npendloan;
558 KASSERT(origpgpp + ndone == pgpp);
559 }
560 slock = nextslock;
561 npendloan = 0;
562 mutex_enter(slock);
563 }
564
565 if ((pg->flags & PG_RELEASED) != 0) {
566 /*
567 * release pages and try again.
568 */
569 mutex_exit(slock);
570 for (; i < npages; i++) {
571 pg = pgpp[i];
572 slock = pg->uobject->vmobjlock;
573
574 mutex_enter(slock);
575 mutex_enter(&uvm_pageqlock);
576 uvm_page_unbusy(&pg, 1);
577 mutex_exit(&uvm_pageqlock);
578 mutex_exit(slock);
579 }
580 goto reget;
581 }
582
583 npendloan++;
584 pgpp++;
585 KASSERT(origpgpp + ndone + npendloan == pgpp);
586 }
587 KASSERT(slock != NULL);
588 KASSERT(npendloan > 0);
589 error = uvm_loanpage(pgpp - npendloan, npendloan);
590 mutex_exit(slock);
591 if (error)
592 goto fail;
593 ndone += npendloan;
594 KASSERT(origpgpp + ndone == pgpp);
595 }
596
597 return 0;
598
599fail:
600 uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
601
602 return error;
603}
604
605/*
606 * uvm_loanuobj: loan a page from a uobj out
607 *
608 * => called with map, amap, uobj locked
609 * => return value:
610 * -1 = fatal error, everything is unlocked, abort.
611 * 0 = lookup in ufi went stale, everything unlocked, relookup and
612 * try again
613 * 1 = got it, everything still locked
614 */
615
616static int
617uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
618{
619 struct vm_amap *amap = ufi->entry->aref.ar_amap;
620 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
621 struct vm_page *pg;
622 int error, npages;
623 bool locked;
624
625 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
626
627 /*
628 * first we must make sure the page is resident.
629 *
630 * XXXCDC: duplicate code with uvm_fault().
631 */
632
633 /* locked: maps(read), amap(if there) */
634 mutex_enter(uobj->vmobjlock);
635 /* locked: maps(read), amap(if there), uobj */
636
637 if (uobj->pgops->pgo_get) { /* try locked pgo_get */
638 npages = 1;
639 pg = NULL;
640 error = (*uobj->pgops->pgo_get)(uobj,
641 va - ufi->entry->start + ufi->entry->offset,
642 &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
643 } else {
644 error = EIO; /* must have pgo_get op */
645 }
646
647 /*
648 * check the result of the locked pgo_get. if there is a problem,
649 * then we fail the loan.
650 */
651
652 if (error && error != EBUSY) {
653 uvmfault_unlockall(ufi, amap, uobj);
654 return (-1);
655 }
656
657 /*
658 * if we need to unlock for I/O, do so now.
659 */
660
661 if (error == EBUSY) {
662 uvmfault_unlockall(ufi, amap, NULL);
663
664 /* locked: uobj */
665 npages = 1;
666 error = (*uobj->pgops->pgo_get)(uobj,
667 va - ufi->entry->start + ufi->entry->offset,
668 &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
669 /* locked: <nothing> */
670
671 if (error) {
672 if (error == EAGAIN) {
673 kpause("fltagain2", false, hz/2, NULL);
674 return (0);
675 }
676 return (-1);
677 }
678
679 /*
680 * pgo_get was a success. attempt to relock everything.
681 */
682
683 locked = uvmfault_relock(ufi);
684 if (locked && amap)
685 amap_lock(amap);
686 uobj = pg->uobject;
687 mutex_enter(uobj->vmobjlock);
688
689 /*
690 * verify that the page has not be released and re-verify
691 * that amap slot is still free. if there is a problem we
692 * drop our lock (thus force a lookup refresh/retry).
693 */
694
695 if ((pg->flags & PG_RELEASED) != 0 ||
696 (locked && amap && amap_lookup(&ufi->entry->aref,
697 ufi->orig_rvaddr - ufi->entry->start))) {
698 if (locked)
699 uvmfault_unlockall(ufi, amap, NULL);
700 locked = false;
701 }
702
703 /*
704 * didn't get the lock? release the page and retry.
705 */
706
707 if (locked == false) {
708 if (pg->flags & PG_WANTED) {
709 wakeup(pg);
710 }
711 if (pg->flags & PG_RELEASED) {
712 mutex_enter(&uvm_pageqlock);
713 uvm_pagefree(pg);
714 mutex_exit(&uvm_pageqlock);
715 mutex_exit(uobj->vmobjlock);
716 return (0);
717 }
718 mutex_enter(&uvm_pageqlock);
719 uvm_pageactivate(pg);
720 mutex_exit(&uvm_pageqlock);
721 pg->flags &= ~(PG_BUSY|PG_WANTED);
722 UVM_PAGE_OWN(pg, NULL);
723 mutex_exit(uobj->vmobjlock);
724 return (0);
725 }
726 }
727
728 KASSERT(uobj == pg->uobject);
729
730 /*
731 * at this point we have the page we want ("pg") marked PG_BUSY for us
732 * and we have all data structures locked. do the loanout. page can
733 * not be PG_RELEASED (we caught this above).
734 */
735
736 if ((flags & UVM_LOAN_TOANON) == 0) {
737 if (uvm_loanpage(&pg, 1)) {
738 uvmfault_unlockall(ufi, amap, uobj);
739 return (-1);
740 }
741 mutex_exit(uobj->vmobjlock);
742 **output = pg;
743 (*output)++;
744 return (1);
745 }
746
747#ifdef notdef
748 /*
749 * must be a loan to an anon. check to see if there is already
750 * an anon associated with this page. if so, then just return
751 * a reference to this object. the page should already be
752 * mapped read-only because it is already on loan.
753 */
754
755 if (pg->uanon) {
756 /* XXX: locking */
757 anon = pg->uanon;
758 anon->an_ref++;
759 if (pg->flags & PG_WANTED) {
760 wakeup(pg);
761 }
762 pg->flags &= ~(PG_WANTED|PG_BUSY);
763 UVM_PAGE_OWN(pg, NULL);
764 mutex_exit(uobj->vmobjlock);
765 **output = anon;
766 (*output)++;
767 return (1);
768 }
769
770 /*
771 * need to allocate a new anon
772 */
773
774 anon = uvm_analloc();
775 if (anon == NULL) {
776 goto fail;
777 }
778 mutex_enter(&uvm_pageqlock);
779 if (pg->wire_count > 0) {
780 mutex_exit(&uvm_pageqlock);
781 UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
782 goto fail;
783 }
784 if (pg->loan_count == 0) {
785 pmap_page_protect(pg, VM_PROT_READ);
786 }
787 pg->loan_count++;
788 pg->uanon = anon;
789 anon->an_page = pg;
790 anon->an_lock = /* TODO: share amap lock */
791 uvm_pageactivate(pg);
792 mutex_exit(&uvm_pageqlock);
793 if (pg->flags & PG_WANTED) {
794 wakeup(pg);
795 }
796 pg->flags &= ~(PG_WANTED|PG_BUSY);
797 UVM_PAGE_OWN(pg, NULL);
798 mutex_exit(uobj->vmobjlock);
799 mutex_exit(&anon->an_lock);
800 **output = anon;
801 (*output)++;
802 return (1);
803
804fail:
805 UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
806 /*
807 * unlock everything and bail out.
808 */
809 if (pg->flags & PG_WANTED) {
810 wakeup(pg);
811 }
812 pg->flags &= ~(PG_WANTED|PG_BUSY);
813 UVM_PAGE_OWN(pg, NULL);
814 uvmfault_unlockall(ufi, amap, uobj, NULL);
815 if (anon) {
816 anon->an_ref--;
817 uvm_anon_free(anon);
818 }
819#endif /* notdef */
820 return (-1);
821}
822
823/*
824 * uvm_loanzero: loan a zero-fill page out
825 *
826 * => called with map, amap, uobj locked
827 * => return value:
828 * -1 = fatal error, everything is unlocked, abort.
829 * 0 = lookup in ufi went stale, everything unlocked, relookup and
830 * try again
831 * 1 = got it, everything still locked
832 */
833
834static struct uvm_object uvm_loanzero_object;
835static kmutex_t uvm_loanzero_lock;
836
837static int
838uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
839{
840 struct vm_page *pg;
841 struct vm_amap *amap = ufi->entry->aref.ar_amap;
842
843 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
844again:
845 mutex_enter(uvm_loanzero_object.vmobjlock);
846
847 /*
848 * first, get ahold of our single zero page.
849 */
850
851 if (__predict_false((pg =
852 TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
853 while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
854 UVM_PGA_ZERO)) == NULL) {
855 mutex_exit(uvm_loanzero_object.vmobjlock);
856 uvmfault_unlockall(ufi, amap, NULL);
857 uvm_wait("loanzero");
858 if (!uvmfault_relock(ufi)) {
859 return (0);
860 }
861 if (amap) {
862 amap_lock(amap);
863 }
864 goto again;
865 }
866
867 /* got a zero'd page. */
868 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
869 pg->flags |= PG_RDONLY;
870 mutex_enter(&uvm_pageqlock);
871 uvm_pageactivate(pg);
872 mutex_exit(&uvm_pageqlock);
873 UVM_PAGE_OWN(pg, NULL);
874 }
875
876 if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */
877 mutex_enter(&uvm_pageqlock);
878 pg->loan_count++;
879 mutex_exit(&uvm_pageqlock);
880 mutex_exit(uvm_loanzero_object.vmobjlock);
881 **output = pg;
882 (*output)++;
883 return (1);
884 }
885
886#ifdef notdef
887 /*
888 * loaning to an anon. check to see if there is already an anon
889 * associated with this page. if so, then just return a reference
890 * to this object.
891 */
892
893 if (pg->uanon) {
894 anon = pg->uanon;
895 mutex_enter(&anon->an_lock);
896 anon->an_ref++;
897 mutex_exit(&anon->an_lock);
898 mutex_exit(uvm_loanzero_object.vmobjlock);
899 **output = anon;
900 (*output)++;
901 return (1);
902 }
903
904 /*
905 * need to allocate a new anon
906 */
907
908 anon = uvm_analloc();
909 if (anon == NULL) {
910 /* out of swap causes us to fail */
911 mutex_exit(uvm_loanzero_object.vmobjlock);
912 uvmfault_unlockall(ufi, amap, NULL, NULL);
913 return (-1);
914 }
915 anon->an_page = pg;
916 pg->uanon = anon;
917 mutex_enter(&uvm_pageqlock);
918 pg->loan_count++;
919 uvm_pageactivate(pg);
920 mutex_exit(&uvm_pageqlock);
921 mutex_exit(&anon->an_lock);
922 mutex_exit(uvm_loanzero_object.vmobjlock);
923 **output = anon;
924 (*output)++;
925 return (1);
926#else
927 return (-1);
928#endif
929}
930
931
932/*
933 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
934 *
935 * => we expect all our resources to be unlocked
936 */
937
938static void
939uvm_unloananon(struct vm_anon **aloans, int nanons)
940{
941#ifdef notdef
942 struct vm_anon *anon, *to_free = NULL;
943
944 /* TODO: locking */
945 amap_lock(amap);
946 while (nanons-- > 0) {
947 anon = *aloans++;
948 if (--anon->an_ref == 0) {
949 anon->an_link = to_free;
950 to_free = anon;
951 }
952 }
953 uvm_anon_freelst(amap, to_free);
954#endif /* notdef */
955}
956
957/*
958 * uvm_unloanpage: kill loans on pages loaned out to the kernel
959 *
960 * => we expect all our resources to be unlocked
961 */
962
963static void
964uvm_unloanpage(struct vm_page **ploans, int npages)
965{
966 struct vm_page *pg;
967 kmutex_t *slock;
968
969 mutex_enter(&uvm_pageqlock);
970 while (npages-- > 0) {
971 pg = *ploans++;
972
973 /*
974 * do a little dance to acquire the object or anon lock
975 * as appropriate. we are locking in the wrong order,
976 * so we have to do a try-lock here.
977 */
978
979 slock = NULL;
980 while (pg->uobject != NULL || pg->uanon != NULL) {
981 if (pg->uobject != NULL) {
982 slock = pg->uobject->vmobjlock;
983 } else {
984 slock = pg->uanon->an_lock;
985 }
986 if (mutex_tryenter(slock)) {
987 break;
988 }
989 /* XXX Better than yielding but inadequate. */
990 kpause("livelock", false, 1, &uvm_pageqlock);
991 slock = NULL;
992 }
993
994 /*
995 * drop our loan. if page is owned by an anon but
996 * PQ_ANON is not set, the page was loaned to the anon
997 * from an object which dropped ownership, so resolve
998 * this by turning the anon's loan into real ownership
999 * (ie. decrement loan_count again and set PQ_ANON).
1000 * after all this, if there are no loans left, put the
1001 * page back a paging queue (if the page is owned by
1002 * an anon) or free it (if the page is now unowned).
1003 */
1004
1005 KASSERT(pg->loan_count > 0);
1006 pg->loan_count--;
1007 if (pg->uobject == NULL && pg->uanon != NULL &&
1008 (pg->pqflags & PQ_ANON) == 0) {
1009 KASSERT(pg->loan_count > 0);
1010 pg->loan_count--;
1011 pg->pqflags |= PQ_ANON;
1012 }
1013 if (pg->loan_count == 0 && pg->uobject == NULL &&
1014 pg->uanon == NULL) {
1015 KASSERT((pg->flags & PG_BUSY) == 0);
1016 uvm_pagefree(pg);
1017 }
1018 if (slock != NULL) {
1019 mutex_exit(slock);
1020 }
1021 }
1022 mutex_exit(&uvm_pageqlock);
1023}
1024
1025/*
1026 * uvm_unloan: kill loans on pages or anons.
1027 */
1028
1029void
1030uvm_unloan(void *v, int npages, int flags)
1031{
1032 if (flags & UVM_LOAN_TOANON) {
1033 uvm_unloananon(v, npages);
1034 } else {
1035 uvm_unloanpage(v, npages);
1036 }
1037}
1038
1039/*
1040 * Minimal pager for uvm_loanzero_object. We need to provide a "put"
1041 * method, because the page can end up on a paging queue, and the
1042 * page daemon will want to call pgo_put when it encounters the page
1043 * on the inactive list.
1044 */
1045
1046static int
1047ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1048{
1049 struct vm_page *pg;
1050
1051 KDASSERT(uobj == &uvm_loanzero_object);
1052
1053 /*
1054 * Don't need to do any work here if we're not freeing pages.
1055 */
1056
1057 if ((flags & PGO_FREE) == 0) {
1058 mutex_exit(uobj->vmobjlock);
1059 return 0;
1060 }
1061
1062 /*
1063 * we don't actually want to ever free the uvm_loanzero_page, so
1064 * just reactivate or dequeue it.
1065 */
1066
1067 pg = TAILQ_FIRST(&uobj->memq);
1068 KASSERT(pg != NULL);
1069 KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1070
1071 mutex_enter(&uvm_pageqlock);
1072 if (pg->uanon)
1073 uvm_pageactivate(pg);
1074 else
1075 uvm_pagedequeue(pg);
1076 mutex_exit(&uvm_pageqlock);
1077
1078 mutex_exit(uobj->vmobjlock);
1079 return 0;
1080}
1081
1082static const struct uvm_pagerops ulz_pager = {
1083 .pgo_put = ulz_put,
1084};
1085
1086/*
1087 * uvm_loan_init(): initialize the uvm_loan() facility.
1088 */
1089
1090void
1091uvm_loan_init(void)
1092{
1093
1094 mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE);
1095 uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1096 uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1097
1098 UVMHIST_INIT(loanhist, 300);
1099}
1100
1101/*
1102 * uvm_loanbreak: break loan on a uobj page
1103 *
1104 * => called with uobj locked
1105 * => the page should be busy
1106 * => return value:
1107 * newly allocated page if succeeded
1108 */
1109struct vm_page *
1110uvm_loanbreak(struct vm_page *uobjpage)
1111{
1112 struct vm_page *pg;
1113#ifdef DIAGNOSTIC
1114 struct uvm_object *uobj = uobjpage->uobject;
1115#endif
1116
1117 KASSERT(uobj != NULL);
1118 KASSERT(mutex_owned(uobj->vmobjlock));
1119 KASSERT(uobjpage->flags & PG_BUSY);
1120
1121 /* alloc new un-owned page */
1122 pg = uvm_pagealloc(NULL, 0, NULL, 0);
1123 if (pg == NULL)
1124 return NULL;
1125
1126 /*
1127 * copy the data from the old page to the new
1128 * one and clear the fake flags on the new page (keep it busy).
1129 * force a reload of the old page by clearing it from all
1130 * pmaps.
1131 * transfer dirtiness of the old page to the new page.
1132 * then lock the page queues to rename the pages.
1133 */
1134
1135 uvm_pagecopy(uobjpage, pg); /* old -> new */
1136 pg->flags &= ~PG_FAKE;
1137 pmap_page_protect(uobjpage, VM_PROT_NONE);
1138 if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1139 pmap_clear_modify(pg);
1140 pg->flags |= PG_CLEAN;
1141 } else {
1142 /* uvm_pagecopy marked it dirty */
1143 KASSERT((pg->flags & PG_CLEAN) == 0);
1144 /* a object with a dirty page should be dirty. */
1145 KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1146 }
1147 if (uobjpage->flags & PG_WANTED)
1148 wakeup(uobjpage);
1149 /* uobj still locked */
1150 uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1151 UVM_PAGE_OWN(uobjpage, NULL);
1152
1153 mutex_enter(&uvm_pageqlock);
1154
1155 /*
1156 * replace uobjpage with new page.
1157 */
1158
1159 uvm_pagereplace(uobjpage, pg);
1160
1161 /*
1162 * if the page is no longer referenced by
1163 * an anon (i.e. we are breaking an O->K
1164 * loan), then remove it from any pageq's.
1165 */
1166 if (uobjpage->uanon == NULL)
1167 uvm_pagedequeue(uobjpage);
1168
1169 /*
1170 * at this point we have absolutely no
1171 * control over uobjpage
1172 */
1173
1174 /* install new page */
1175 uvm_pageactivate(pg);
1176 mutex_exit(&uvm_pageqlock);
1177
1178 /*
1179 * done! loan is broken and "pg" is
1180 * PG_BUSY. it can now replace uobjpage.
1181 */
1182
1183 return pg;
1184}
1185
1186int
1187uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1188{
1189 struct vm_page *pg;
1190
1191 KASSERT(mutex_owned(anon->an_lock));
1192 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
1193
1194 /* get new un-owned replacement page */
1195 pg = uvm_pagealloc(NULL, 0, NULL, 0);
1196 if (pg == NULL) {
1197 return ENOMEM;
1198 }
1199
1200 /* copy old -> new */
1201 uvm_pagecopy(anon->an_page, pg);
1202
1203 /* force reload */
1204 pmap_page_protect(anon->an_page, VM_PROT_NONE);
1205 mutex_enter(&uvm_pageqlock); /* KILL loan */
1206
1207 anon->an_page->uanon = NULL;
1208 /* in case we owned */
1209 anon->an_page->pqflags &= ~PQ_ANON;
1210
1211 if (uobj) {
1212 /* if we were receiver of loan */
1213 anon->an_page->loan_count--;
1214 } else {
1215 /*
1216 * we were the lender (A->K); need to remove the page from
1217 * pageq's.
1218 */
1219 uvm_pagedequeue(anon->an_page);
1220 }
1221
1222 if (uobj) {
1223 mutex_exit(uobj->vmobjlock);
1224 }
1225
1226 /* install new page in anon */
1227 anon->an_page = pg;
1228 pg->uanon = anon;
1229 pg->pqflags |= PQ_ANON;
1230
1231 uvm_pageactivate(pg);
1232 mutex_exit(&uvm_pageqlock);
1233
1234 pg->flags &= ~(PG_BUSY|PG_FAKE);
1235 UVM_PAGE_OWN(pg, NULL);
1236
1237 /* done! */
1238
1239 return 0;
1240}
1241