1 | /* $NetBSD: uvm_loan.c,v 1.83 2012/07/30 23:56:48 matt Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
17 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
19 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
21 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
22 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
23 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
25 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | * |
27 | * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp |
28 | */ |
29 | |
30 | /* |
31 | * uvm_loan.c: page loanout handler |
32 | */ |
33 | |
34 | #include <sys/cdefs.h> |
35 | __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.83 2012/07/30 23:56:48 matt Exp $" ); |
36 | |
37 | #include <sys/param.h> |
38 | #include <sys/systm.h> |
39 | #include <sys/kernel.h> |
40 | #include <sys/mman.h> |
41 | |
42 | #include <uvm/uvm.h> |
43 | |
44 | #ifdef UVMHIST |
45 | UVMHIST_DEFINE(loanhist); |
46 | #endif |
47 | |
48 | /* |
49 | * "loaned" pages are pages which are (read-only, copy-on-write) loaned |
50 | * from the VM system to other parts of the kernel. this allows page |
51 | * copying to be avoided (e.g. you can loan pages from objs/anons to |
52 | * the mbuf system). |
53 | * |
54 | * there are 3 types of loans possible: |
55 | * O->K uvm_object page to wired kernel page (e.g. mbuf data area) |
56 | * A->K anon page to wired kernel page (e.g. mbuf data area) |
57 | * O->A uvm_object to anon loan (e.g. vnode page to an anon) |
58 | * note that it possible to have an O page loaned to both an A and K |
59 | * at the same time. |
60 | * |
61 | * loans are tracked by pg->loan_count. an O->A page will have both |
62 | * a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort |
63 | * of page is considered "owned" by the uvm_object (not the anon). |
64 | * |
65 | * each loan of a page to the kernel bumps the pg->wire_count. the |
66 | * kernel mappings for these pages will be read-only and wired. since |
67 | * the page will also be wired, it will not be a candidate for pageout, |
68 | * and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a |
69 | * write fault in the kernel to one of these pages will not cause |
70 | * copy-on-write. instead, the page fault is considered fatal. this |
71 | * is because the kernel mapping will have no way to look up the |
72 | * object/anon which the page is owned by. this is a good side-effect, |
73 | * since a kernel write to a loaned page is an error. |
74 | * |
75 | * owners that want to free their pages and discover that they are |
76 | * loaned out simply "disown" them (the page becomes an orphan). these |
77 | * pages should be freed when the last loan is dropped. in some cases |
78 | * an anon may "adopt" an orphaned page. |
79 | * |
80 | * locking: to read pg->loan_count either the owner or the page queues |
81 | * must be locked. to modify pg->loan_count, both the owner of the page |
82 | * and the PQs must be locked. pg->flags is (as always) locked by |
83 | * the owner of the page. |
84 | * |
85 | * note that locking from the "loaned" side is tricky since the object |
86 | * getting the loaned page has no reference to the page's owner and thus |
87 | * the owner could "die" at any time. in order to prevent the owner |
88 | * from dying the page queues should be locked. this forces us to sometimes |
89 | * use "try" locking. |
90 | * |
91 | * loans are typically broken by the following events: |
92 | * 1. user-level xwrite fault to a loaned page |
93 | * 2. pageout of clean+inactive O->A loaned page |
94 | * 3. owner frees page (e.g. pager flush) |
95 | * |
96 | * note that loaning a page causes all mappings of the page to become |
97 | * read-only (via pmap_page_protect). this could have an unexpected |
98 | * effect on normal "wired" pages if one is not careful (XXX). |
99 | */ |
100 | |
101 | /* |
102 | * local prototypes |
103 | */ |
104 | |
105 | static int uvm_loananon(struct uvm_faultinfo *, void ***, |
106 | int, struct vm_anon *); |
107 | static int uvm_loanuobj(struct uvm_faultinfo *, void ***, |
108 | int, vaddr_t); |
109 | static int uvm_loanzero(struct uvm_faultinfo *, void ***, int); |
110 | static void uvm_unloananon(struct vm_anon **, int); |
111 | static void uvm_unloanpage(struct vm_page **, int); |
112 | static int uvm_loanpage(struct vm_page **, int); |
113 | |
114 | |
115 | /* |
116 | * inlines |
117 | */ |
118 | |
119 | /* |
120 | * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan()) |
121 | * |
122 | * => "ufi" is the result of a successful map lookup (meaning that |
123 | * on entry the map is locked by the caller) |
124 | * => we may unlock and then relock the map if needed (for I/O) |
125 | * => we put our output result in "output" |
126 | * => we always return with the map unlocked |
127 | * => possible return values: |
128 | * -1 == error, map is unlocked |
129 | * 0 == map relock error (try again!), map is unlocked |
130 | * >0 == number of pages we loaned, map is unlocked |
131 | * |
132 | * NOTE: We can live with this being an inline, because it is only called |
133 | * from one place. |
134 | */ |
135 | |
136 | static inline int |
137 | uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags) |
138 | { |
139 | vaddr_t curaddr = ufi->orig_rvaddr; |
140 | vsize_t togo = ufi->size; |
141 | struct vm_aref *aref = &ufi->entry->aref; |
142 | struct uvm_object *uobj = ufi->entry->object.uvm_obj; |
143 | struct vm_anon *anon; |
144 | int rv, result = 0; |
145 | |
146 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); |
147 | |
148 | /* |
149 | * lock us the rest of the way down (we unlock before return) |
150 | */ |
151 | if (aref->ar_amap) { |
152 | amap_lock(aref->ar_amap); |
153 | } |
154 | |
155 | /* |
156 | * loop until done |
157 | */ |
158 | while (togo) { |
159 | |
160 | /* |
161 | * find the page we want. check the anon layer first. |
162 | */ |
163 | |
164 | if (aref->ar_amap) { |
165 | anon = amap_lookup(aref, curaddr - ufi->entry->start); |
166 | } else { |
167 | anon = NULL; |
168 | } |
169 | |
170 | /* locked: map, amap, uobj */ |
171 | if (anon) { |
172 | rv = uvm_loananon(ufi, output, flags, anon); |
173 | } else if (uobj) { |
174 | rv = uvm_loanuobj(ufi, output, flags, curaddr); |
175 | } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) { |
176 | rv = uvm_loanzero(ufi, output, flags); |
177 | } else { |
178 | uvmfault_unlockall(ufi, aref->ar_amap, uobj); |
179 | rv = -1; |
180 | } |
181 | /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ |
182 | KASSERT(rv > 0 || aref->ar_amap == NULL || |
183 | !mutex_owned(aref->ar_amap->am_lock)); |
184 | KASSERT(rv > 0 || uobj == NULL || |
185 | !mutex_owned(uobj->vmobjlock)); |
186 | |
187 | /* total failure */ |
188 | if (rv < 0) { |
189 | UVMHIST_LOG(loanhist, "failure %d" , rv, 0,0,0); |
190 | return (-1); |
191 | } |
192 | |
193 | /* relock failed, need to do another lookup */ |
194 | if (rv == 0) { |
195 | UVMHIST_LOG(loanhist, "relock failure %d" , result |
196 | ,0,0,0); |
197 | return (result); |
198 | } |
199 | |
200 | /* |
201 | * got it... advance to next page |
202 | */ |
203 | |
204 | result++; |
205 | togo -= PAGE_SIZE; |
206 | curaddr += PAGE_SIZE; |
207 | } |
208 | |
209 | /* |
210 | * unlock what we locked, unlock the maps and return |
211 | */ |
212 | |
213 | if (aref->ar_amap) { |
214 | amap_unlock(aref->ar_amap); |
215 | } |
216 | uvmfault_unlockmaps(ufi, false); |
217 | UVMHIST_LOG(loanhist, "done %d" , result, 0,0,0); |
218 | return (result); |
219 | } |
220 | |
221 | /* |
222 | * normal functions |
223 | */ |
224 | |
225 | /* |
226 | * uvm_loan: loan pages in a map out to anons or to the kernel |
227 | * |
228 | * => map should be unlocked |
229 | * => start and len should be multiples of PAGE_SIZE |
230 | * => result is either an array of anon's or vm_pages (depending on flags) |
231 | * => flag values: UVM_LOAN_TOANON - loan to anons |
232 | * UVM_LOAN_TOPAGE - loan to wired kernel page |
233 | * one and only one of these flags must be set! |
234 | * => returns 0 (success), or an appropriate error number |
235 | */ |
236 | |
237 | int |
238 | uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) |
239 | { |
240 | struct uvm_faultinfo ufi; |
241 | void **result, **output; |
242 | int rv, error; |
243 | |
244 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); |
245 | |
246 | /* |
247 | * ensure that one and only one of the flags is set |
248 | */ |
249 | |
250 | KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^ |
251 | ((flags & UVM_LOAN_TOPAGE) == 0)); |
252 | |
253 | /* |
254 | * "output" is a pointer to the current place to put the loaned page. |
255 | */ |
256 | |
257 | result = v; |
258 | output = &result[0]; /* start at the beginning ... */ |
259 | |
260 | /* |
261 | * while we've got pages to do |
262 | */ |
263 | |
264 | while (len > 0) { |
265 | |
266 | /* |
267 | * fill in params for a call to uvmfault_lookup |
268 | */ |
269 | |
270 | ufi.orig_map = map; |
271 | ufi.orig_rvaddr = start; |
272 | ufi.orig_size = len; |
273 | |
274 | /* |
275 | * do the lookup, the only time this will fail is if we hit on |
276 | * an unmapped region (an error) |
277 | */ |
278 | |
279 | if (!uvmfault_lookup(&ufi, false)) { |
280 | error = ENOENT; |
281 | goto fail; |
282 | } |
283 | |
284 | /* |
285 | * map now locked. now do the loanout... |
286 | */ |
287 | |
288 | rv = uvm_loanentry(&ufi, &output, flags); |
289 | if (rv < 0) { |
290 | /* all unlocked due to error */ |
291 | error = EINVAL; |
292 | goto fail; |
293 | } |
294 | |
295 | /* |
296 | * done! the map is unlocked. advance, if possible. |
297 | * |
298 | * XXXCDC: could be recoded to hold the map lock with |
299 | * smarter code (but it only happens on map entry |
300 | * boundaries, so it isn't that bad). |
301 | */ |
302 | |
303 | if (rv) { |
304 | rv <<= PAGE_SHIFT; |
305 | len -= rv; |
306 | start += rv; |
307 | } |
308 | } |
309 | UVMHIST_LOG(loanhist, "success" , 0,0,0,0); |
310 | return 0; |
311 | |
312 | fail: |
313 | /* |
314 | * failed to complete loans. drop any loans and return failure code. |
315 | * map is already unlocked. |
316 | */ |
317 | |
318 | if (output - result) { |
319 | if (flags & UVM_LOAN_TOANON) { |
320 | uvm_unloananon((struct vm_anon **)result, |
321 | output - result); |
322 | } else { |
323 | uvm_unloanpage((struct vm_page **)result, |
324 | output - result); |
325 | } |
326 | } |
327 | UVMHIST_LOG(loanhist, "error %d" , error,0,0,0); |
328 | return (error); |
329 | } |
330 | |
331 | /* |
332 | * uvm_loananon: loan a page from an anon out |
333 | * |
334 | * => called with map, amap, uobj locked |
335 | * => return value: |
336 | * -1 = fatal error, everything is unlocked, abort. |
337 | * 0 = lookup in ufi went stale, everything unlocked, relookup and |
338 | * try again |
339 | * 1 = got it, everything still locked |
340 | */ |
341 | |
342 | int |
343 | uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, |
344 | struct vm_anon *anon) |
345 | { |
346 | struct vm_page *pg; |
347 | int error; |
348 | |
349 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); |
350 | |
351 | /* |
352 | * if we are loaning to "another" anon then it is easy, we just |
353 | * bump the reference count on the current anon and return a |
354 | * pointer to it (it becomes copy-on-write shared). |
355 | */ |
356 | |
357 | if (flags & UVM_LOAN_TOANON) { |
358 | KASSERT(mutex_owned(anon->an_lock)); |
359 | pg = anon->an_page; |
360 | if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) { |
361 | if (pg->wire_count > 0) { |
362 | UVMHIST_LOG(loanhist, "->A wired %p" , pg,0,0,0); |
363 | uvmfault_unlockall(ufi, |
364 | ufi->entry->aref.ar_amap, |
365 | ufi->entry->object.uvm_obj); |
366 | return (-1); |
367 | } |
368 | pmap_page_protect(pg, VM_PROT_READ); |
369 | } |
370 | anon->an_ref++; |
371 | **output = anon; |
372 | (*output)++; |
373 | UVMHIST_LOG(loanhist, "->A done" , 0,0,0,0); |
374 | return (1); |
375 | } |
376 | |
377 | /* |
378 | * we are loaning to a kernel-page. we need to get the page |
379 | * resident so we can wire it. uvmfault_anonget will handle |
380 | * this for us. |
381 | */ |
382 | |
383 | KASSERT(mutex_owned(anon->an_lock)); |
384 | error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon); |
385 | |
386 | /* |
387 | * if we were unable to get the anon, then uvmfault_anonget has |
388 | * unlocked everything and returned an error code. |
389 | */ |
390 | |
391 | if (error) { |
392 | UVMHIST_LOG(loanhist, "error %d" , error,0,0,0); |
393 | |
394 | /* need to refault (i.e. refresh our lookup) ? */ |
395 | if (error == ERESTART) { |
396 | return (0); |
397 | } |
398 | |
399 | /* "try again"? sleep a bit and retry ... */ |
400 | if (error == EAGAIN) { |
401 | kpause("loanagain" , false, hz/2, NULL); |
402 | return (0); |
403 | } |
404 | |
405 | /* otherwise flag it as an error */ |
406 | return (-1); |
407 | } |
408 | |
409 | /* |
410 | * we have the page and its owner locked: do the loan now. |
411 | */ |
412 | |
413 | pg = anon->an_page; |
414 | mutex_enter(&uvm_pageqlock); |
415 | if (pg->wire_count > 0) { |
416 | mutex_exit(&uvm_pageqlock); |
417 | UVMHIST_LOG(loanhist, "->K wired %p" , pg,0,0,0); |
418 | KASSERT(pg->uobject == NULL); |
419 | uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL); |
420 | return (-1); |
421 | } |
422 | if (pg->loan_count == 0) { |
423 | pmap_page_protect(pg, VM_PROT_READ); |
424 | } |
425 | pg->loan_count++; |
426 | uvm_pageactivate(pg); |
427 | mutex_exit(&uvm_pageqlock); |
428 | **output = pg; |
429 | (*output)++; |
430 | |
431 | /* unlock and return success */ |
432 | if (pg->uobject) |
433 | mutex_exit(pg->uobject->vmobjlock); |
434 | UVMHIST_LOG(loanhist, "->K done" , 0,0,0,0); |
435 | return (1); |
436 | } |
437 | |
438 | /* |
439 | * uvm_loanpage: loan out pages to kernel (->K) |
440 | * |
441 | * => pages should be object-owned and the object should be locked. |
442 | * => in the case of error, the object might be unlocked and relocked. |
443 | * => caller should busy the pages beforehand. |
444 | * => pages will be unbusied. |
445 | * => fail with EBUSY if meet a wired page. |
446 | */ |
447 | static int |
448 | uvm_loanpage(struct vm_page **pgpp, int npages) |
449 | { |
450 | int i; |
451 | int error = 0; |
452 | |
453 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); |
454 | |
455 | for (i = 0; i < npages; i++) { |
456 | struct vm_page *pg = pgpp[i]; |
457 | |
458 | KASSERT(pg->uobject != NULL); |
459 | KASSERT(pg->uobject == pgpp[0]->uobject); |
460 | KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT))); |
461 | KASSERT(mutex_owned(pg->uobject->vmobjlock)); |
462 | KASSERT(pg->flags & PG_BUSY); |
463 | |
464 | mutex_enter(&uvm_pageqlock); |
465 | if (pg->wire_count > 0) { |
466 | mutex_exit(&uvm_pageqlock); |
467 | UVMHIST_LOG(loanhist, "wired %p" , pg,0,0,0); |
468 | error = EBUSY; |
469 | break; |
470 | } |
471 | if (pg->loan_count == 0) { |
472 | pmap_page_protect(pg, VM_PROT_READ); |
473 | } |
474 | pg->loan_count++; |
475 | uvm_pageactivate(pg); |
476 | mutex_exit(&uvm_pageqlock); |
477 | } |
478 | |
479 | uvm_page_unbusy(pgpp, npages); |
480 | |
481 | if (error) { |
482 | /* |
483 | * backout what we've done |
484 | */ |
485 | kmutex_t *slock = pgpp[0]->uobject->vmobjlock; |
486 | |
487 | mutex_exit(slock); |
488 | uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE); |
489 | mutex_enter(slock); |
490 | } |
491 | |
492 | UVMHIST_LOG(loanhist, "done %d" , error,0,0,0); |
493 | return error; |
494 | } |
495 | |
496 | /* |
497 | * XXX UBC temp limit |
498 | * number of pages to get at once. |
499 | * should be <= MAX_READ_AHEAD in genfs_vnops.c |
500 | */ |
501 | #define UVM_LOAN_GET_CHUNK 16 |
502 | |
503 | /* |
504 | * uvm_loanuobjpages: loan pages from a uobj out (O->K) |
505 | * |
506 | * => uobj shouldn't be locked. (we'll lock it) |
507 | * => fail with EBUSY if we meet a wired page. |
508 | */ |
509 | int |
510 | uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, |
511 | struct vm_page **origpgpp) |
512 | { |
513 | int ndone; /* # of pages loaned out */ |
514 | struct vm_page **pgpp; |
515 | int error; |
516 | int i; |
517 | kmutex_t *slock; |
518 | |
519 | pgpp = origpgpp; |
520 | for (ndone = 0; ndone < orignpages; ) { |
521 | int npages; |
522 | /* npendloan: # of pages busied but not loand out yet. */ |
523 | int npendloan = 0xdead; /* XXX gcc */ |
524 | reget: |
525 | npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone); |
526 | mutex_enter(uobj->vmobjlock); |
527 | error = (*uobj->pgops->pgo_get)(uobj, |
528 | pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0, |
529 | VM_PROT_READ, 0, PGO_SYNCIO); |
530 | if (error == EAGAIN) { |
531 | kpause("loanuopg" , false, hz/2, NULL); |
532 | continue; |
533 | } |
534 | if (error) |
535 | goto fail; |
536 | |
537 | KASSERT(npages > 0); |
538 | |
539 | /* loan and unbusy pages */ |
540 | slock = NULL; |
541 | for (i = 0; i < npages; i++) { |
542 | kmutex_t *nextslock; /* slock for next page */ |
543 | struct vm_page *pg = *pgpp; |
544 | |
545 | /* XXX assuming that the page is owned by uobj */ |
546 | KASSERT(pg->uobject != NULL); |
547 | nextslock = pg->uobject->vmobjlock; |
548 | |
549 | if (slock != nextslock) { |
550 | if (slock) { |
551 | KASSERT(npendloan > 0); |
552 | error = uvm_loanpage(pgpp - npendloan, |
553 | npendloan); |
554 | mutex_exit(slock); |
555 | if (error) |
556 | goto fail; |
557 | ndone += npendloan; |
558 | KASSERT(origpgpp + ndone == pgpp); |
559 | } |
560 | slock = nextslock; |
561 | npendloan = 0; |
562 | mutex_enter(slock); |
563 | } |
564 | |
565 | if ((pg->flags & PG_RELEASED) != 0) { |
566 | /* |
567 | * release pages and try again. |
568 | */ |
569 | mutex_exit(slock); |
570 | for (; i < npages; i++) { |
571 | pg = pgpp[i]; |
572 | slock = pg->uobject->vmobjlock; |
573 | |
574 | mutex_enter(slock); |
575 | mutex_enter(&uvm_pageqlock); |
576 | uvm_page_unbusy(&pg, 1); |
577 | mutex_exit(&uvm_pageqlock); |
578 | mutex_exit(slock); |
579 | } |
580 | goto reget; |
581 | } |
582 | |
583 | npendloan++; |
584 | pgpp++; |
585 | KASSERT(origpgpp + ndone + npendloan == pgpp); |
586 | } |
587 | KASSERT(slock != NULL); |
588 | KASSERT(npendloan > 0); |
589 | error = uvm_loanpage(pgpp - npendloan, npendloan); |
590 | mutex_exit(slock); |
591 | if (error) |
592 | goto fail; |
593 | ndone += npendloan; |
594 | KASSERT(origpgpp + ndone == pgpp); |
595 | } |
596 | |
597 | return 0; |
598 | |
599 | fail: |
600 | uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE); |
601 | |
602 | return error; |
603 | } |
604 | |
605 | /* |
606 | * uvm_loanuobj: loan a page from a uobj out |
607 | * |
608 | * => called with map, amap, uobj locked |
609 | * => return value: |
610 | * -1 = fatal error, everything is unlocked, abort. |
611 | * 0 = lookup in ufi went stale, everything unlocked, relookup and |
612 | * try again |
613 | * 1 = got it, everything still locked |
614 | */ |
615 | |
616 | static int |
617 | uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) |
618 | { |
619 | struct vm_amap *amap = ufi->entry->aref.ar_amap; |
620 | struct uvm_object *uobj = ufi->entry->object.uvm_obj; |
621 | struct vm_page *pg; |
622 | int error, npages; |
623 | bool locked; |
624 | |
625 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); |
626 | |
627 | /* |
628 | * first we must make sure the page is resident. |
629 | * |
630 | * XXXCDC: duplicate code with uvm_fault(). |
631 | */ |
632 | |
633 | /* locked: maps(read), amap(if there) */ |
634 | mutex_enter(uobj->vmobjlock); |
635 | /* locked: maps(read), amap(if there), uobj */ |
636 | |
637 | if (uobj->pgops->pgo_get) { /* try locked pgo_get */ |
638 | npages = 1; |
639 | pg = NULL; |
640 | error = (*uobj->pgops->pgo_get)(uobj, |
641 | va - ufi->entry->start + ufi->entry->offset, |
642 | &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED); |
643 | } else { |
644 | error = EIO; /* must have pgo_get op */ |
645 | } |
646 | |
647 | /* |
648 | * check the result of the locked pgo_get. if there is a problem, |
649 | * then we fail the loan. |
650 | */ |
651 | |
652 | if (error && error != EBUSY) { |
653 | uvmfault_unlockall(ufi, amap, uobj); |
654 | return (-1); |
655 | } |
656 | |
657 | /* |
658 | * if we need to unlock for I/O, do so now. |
659 | */ |
660 | |
661 | if (error == EBUSY) { |
662 | uvmfault_unlockall(ufi, amap, NULL); |
663 | |
664 | /* locked: uobj */ |
665 | npages = 1; |
666 | error = (*uobj->pgops->pgo_get)(uobj, |
667 | va - ufi->entry->start + ufi->entry->offset, |
668 | &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO); |
669 | /* locked: <nothing> */ |
670 | |
671 | if (error) { |
672 | if (error == EAGAIN) { |
673 | kpause("fltagain2" , false, hz/2, NULL); |
674 | return (0); |
675 | } |
676 | return (-1); |
677 | } |
678 | |
679 | /* |
680 | * pgo_get was a success. attempt to relock everything. |
681 | */ |
682 | |
683 | locked = uvmfault_relock(ufi); |
684 | if (locked && amap) |
685 | amap_lock(amap); |
686 | uobj = pg->uobject; |
687 | mutex_enter(uobj->vmobjlock); |
688 | |
689 | /* |
690 | * verify that the page has not be released and re-verify |
691 | * that amap slot is still free. if there is a problem we |
692 | * drop our lock (thus force a lookup refresh/retry). |
693 | */ |
694 | |
695 | if ((pg->flags & PG_RELEASED) != 0 || |
696 | (locked && amap && amap_lookup(&ufi->entry->aref, |
697 | ufi->orig_rvaddr - ufi->entry->start))) { |
698 | if (locked) |
699 | uvmfault_unlockall(ufi, amap, NULL); |
700 | locked = false; |
701 | } |
702 | |
703 | /* |
704 | * didn't get the lock? release the page and retry. |
705 | */ |
706 | |
707 | if (locked == false) { |
708 | if (pg->flags & PG_WANTED) { |
709 | wakeup(pg); |
710 | } |
711 | if (pg->flags & PG_RELEASED) { |
712 | mutex_enter(&uvm_pageqlock); |
713 | uvm_pagefree(pg); |
714 | mutex_exit(&uvm_pageqlock); |
715 | mutex_exit(uobj->vmobjlock); |
716 | return (0); |
717 | } |
718 | mutex_enter(&uvm_pageqlock); |
719 | uvm_pageactivate(pg); |
720 | mutex_exit(&uvm_pageqlock); |
721 | pg->flags &= ~(PG_BUSY|PG_WANTED); |
722 | UVM_PAGE_OWN(pg, NULL); |
723 | mutex_exit(uobj->vmobjlock); |
724 | return (0); |
725 | } |
726 | } |
727 | |
728 | KASSERT(uobj == pg->uobject); |
729 | |
730 | /* |
731 | * at this point we have the page we want ("pg") marked PG_BUSY for us |
732 | * and we have all data structures locked. do the loanout. page can |
733 | * not be PG_RELEASED (we caught this above). |
734 | */ |
735 | |
736 | if ((flags & UVM_LOAN_TOANON) == 0) { |
737 | if (uvm_loanpage(&pg, 1)) { |
738 | uvmfault_unlockall(ufi, amap, uobj); |
739 | return (-1); |
740 | } |
741 | mutex_exit(uobj->vmobjlock); |
742 | **output = pg; |
743 | (*output)++; |
744 | return (1); |
745 | } |
746 | |
747 | #ifdef notdef |
748 | /* |
749 | * must be a loan to an anon. check to see if there is already |
750 | * an anon associated with this page. if so, then just return |
751 | * a reference to this object. the page should already be |
752 | * mapped read-only because it is already on loan. |
753 | */ |
754 | |
755 | if (pg->uanon) { |
756 | /* XXX: locking */ |
757 | anon = pg->uanon; |
758 | anon->an_ref++; |
759 | if (pg->flags & PG_WANTED) { |
760 | wakeup(pg); |
761 | } |
762 | pg->flags &= ~(PG_WANTED|PG_BUSY); |
763 | UVM_PAGE_OWN(pg, NULL); |
764 | mutex_exit(uobj->vmobjlock); |
765 | **output = anon; |
766 | (*output)++; |
767 | return (1); |
768 | } |
769 | |
770 | /* |
771 | * need to allocate a new anon |
772 | */ |
773 | |
774 | anon = uvm_analloc(); |
775 | if (anon == NULL) { |
776 | goto fail; |
777 | } |
778 | mutex_enter(&uvm_pageqlock); |
779 | if (pg->wire_count > 0) { |
780 | mutex_exit(&uvm_pageqlock); |
781 | UVMHIST_LOG(loanhist, "wired %p" , pg,0,0,0); |
782 | goto fail; |
783 | } |
784 | if (pg->loan_count == 0) { |
785 | pmap_page_protect(pg, VM_PROT_READ); |
786 | } |
787 | pg->loan_count++; |
788 | pg->uanon = anon; |
789 | anon->an_page = pg; |
790 | anon->an_lock = /* TODO: share amap lock */ |
791 | uvm_pageactivate(pg); |
792 | mutex_exit(&uvm_pageqlock); |
793 | if (pg->flags & PG_WANTED) { |
794 | wakeup(pg); |
795 | } |
796 | pg->flags &= ~(PG_WANTED|PG_BUSY); |
797 | UVM_PAGE_OWN(pg, NULL); |
798 | mutex_exit(uobj->vmobjlock); |
799 | mutex_exit(&anon->an_lock); |
800 | **output = anon; |
801 | (*output)++; |
802 | return (1); |
803 | |
804 | fail: |
805 | UVMHIST_LOG(loanhist, "fail" , 0,0,0,0); |
806 | /* |
807 | * unlock everything and bail out. |
808 | */ |
809 | if (pg->flags & PG_WANTED) { |
810 | wakeup(pg); |
811 | } |
812 | pg->flags &= ~(PG_WANTED|PG_BUSY); |
813 | UVM_PAGE_OWN(pg, NULL); |
814 | uvmfault_unlockall(ufi, amap, uobj, NULL); |
815 | if (anon) { |
816 | anon->an_ref--; |
817 | uvm_anon_free(anon); |
818 | } |
819 | #endif /* notdef */ |
820 | return (-1); |
821 | } |
822 | |
823 | /* |
824 | * uvm_loanzero: loan a zero-fill page out |
825 | * |
826 | * => called with map, amap, uobj locked |
827 | * => return value: |
828 | * -1 = fatal error, everything is unlocked, abort. |
829 | * 0 = lookup in ufi went stale, everything unlocked, relookup and |
830 | * try again |
831 | * 1 = got it, everything still locked |
832 | */ |
833 | |
834 | static struct uvm_object uvm_loanzero_object; |
835 | static kmutex_t uvm_loanzero_lock; |
836 | |
837 | static int |
838 | uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags) |
839 | { |
840 | struct vm_page *pg; |
841 | struct vm_amap *amap = ufi->entry->aref.ar_amap; |
842 | |
843 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); |
844 | again: |
845 | mutex_enter(uvm_loanzero_object.vmobjlock); |
846 | |
847 | /* |
848 | * first, get ahold of our single zero page. |
849 | */ |
850 | |
851 | if (__predict_false((pg = |
852 | TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) { |
853 | while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL, |
854 | UVM_PGA_ZERO)) == NULL) { |
855 | mutex_exit(uvm_loanzero_object.vmobjlock); |
856 | uvmfault_unlockall(ufi, amap, NULL); |
857 | uvm_wait("loanzero" ); |
858 | if (!uvmfault_relock(ufi)) { |
859 | return (0); |
860 | } |
861 | if (amap) { |
862 | amap_lock(amap); |
863 | } |
864 | goto again; |
865 | } |
866 | |
867 | /* got a zero'd page. */ |
868 | pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); |
869 | pg->flags |= PG_RDONLY; |
870 | mutex_enter(&uvm_pageqlock); |
871 | uvm_pageactivate(pg); |
872 | mutex_exit(&uvm_pageqlock); |
873 | UVM_PAGE_OWN(pg, NULL); |
874 | } |
875 | |
876 | if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */ |
877 | mutex_enter(&uvm_pageqlock); |
878 | pg->loan_count++; |
879 | mutex_exit(&uvm_pageqlock); |
880 | mutex_exit(uvm_loanzero_object.vmobjlock); |
881 | **output = pg; |
882 | (*output)++; |
883 | return (1); |
884 | } |
885 | |
886 | #ifdef notdef |
887 | /* |
888 | * loaning to an anon. check to see if there is already an anon |
889 | * associated with this page. if so, then just return a reference |
890 | * to this object. |
891 | */ |
892 | |
893 | if (pg->uanon) { |
894 | anon = pg->uanon; |
895 | mutex_enter(&anon->an_lock); |
896 | anon->an_ref++; |
897 | mutex_exit(&anon->an_lock); |
898 | mutex_exit(uvm_loanzero_object.vmobjlock); |
899 | **output = anon; |
900 | (*output)++; |
901 | return (1); |
902 | } |
903 | |
904 | /* |
905 | * need to allocate a new anon |
906 | */ |
907 | |
908 | anon = uvm_analloc(); |
909 | if (anon == NULL) { |
910 | /* out of swap causes us to fail */ |
911 | mutex_exit(uvm_loanzero_object.vmobjlock); |
912 | uvmfault_unlockall(ufi, amap, NULL, NULL); |
913 | return (-1); |
914 | } |
915 | anon->an_page = pg; |
916 | pg->uanon = anon; |
917 | mutex_enter(&uvm_pageqlock); |
918 | pg->loan_count++; |
919 | uvm_pageactivate(pg); |
920 | mutex_exit(&uvm_pageqlock); |
921 | mutex_exit(&anon->an_lock); |
922 | mutex_exit(uvm_loanzero_object.vmobjlock); |
923 | **output = anon; |
924 | (*output)++; |
925 | return (1); |
926 | #else |
927 | return (-1); |
928 | #endif |
929 | } |
930 | |
931 | |
932 | /* |
933 | * uvm_unloananon: kill loans on anons (basically a normal ref drop) |
934 | * |
935 | * => we expect all our resources to be unlocked |
936 | */ |
937 | |
938 | static void |
939 | uvm_unloananon(struct vm_anon **aloans, int nanons) |
940 | { |
941 | #ifdef notdef |
942 | struct vm_anon *anon, *to_free = NULL; |
943 | |
944 | /* TODO: locking */ |
945 | amap_lock(amap); |
946 | while (nanons-- > 0) { |
947 | anon = *aloans++; |
948 | if (--anon->an_ref == 0) { |
949 | anon->an_link = to_free; |
950 | to_free = anon; |
951 | } |
952 | } |
953 | uvm_anon_freelst(amap, to_free); |
954 | #endif /* notdef */ |
955 | } |
956 | |
957 | /* |
958 | * uvm_unloanpage: kill loans on pages loaned out to the kernel |
959 | * |
960 | * => we expect all our resources to be unlocked |
961 | */ |
962 | |
963 | static void |
964 | uvm_unloanpage(struct vm_page **ploans, int npages) |
965 | { |
966 | struct vm_page *pg; |
967 | kmutex_t *slock; |
968 | |
969 | mutex_enter(&uvm_pageqlock); |
970 | while (npages-- > 0) { |
971 | pg = *ploans++; |
972 | |
973 | /* |
974 | * do a little dance to acquire the object or anon lock |
975 | * as appropriate. we are locking in the wrong order, |
976 | * so we have to do a try-lock here. |
977 | */ |
978 | |
979 | slock = NULL; |
980 | while (pg->uobject != NULL || pg->uanon != NULL) { |
981 | if (pg->uobject != NULL) { |
982 | slock = pg->uobject->vmobjlock; |
983 | } else { |
984 | slock = pg->uanon->an_lock; |
985 | } |
986 | if (mutex_tryenter(slock)) { |
987 | break; |
988 | } |
989 | /* XXX Better than yielding but inadequate. */ |
990 | kpause("livelock" , false, 1, &uvm_pageqlock); |
991 | slock = NULL; |
992 | } |
993 | |
994 | /* |
995 | * drop our loan. if page is owned by an anon but |
996 | * PQ_ANON is not set, the page was loaned to the anon |
997 | * from an object which dropped ownership, so resolve |
998 | * this by turning the anon's loan into real ownership |
999 | * (ie. decrement loan_count again and set PQ_ANON). |
1000 | * after all this, if there are no loans left, put the |
1001 | * page back a paging queue (if the page is owned by |
1002 | * an anon) or free it (if the page is now unowned). |
1003 | */ |
1004 | |
1005 | KASSERT(pg->loan_count > 0); |
1006 | pg->loan_count--; |
1007 | if (pg->uobject == NULL && pg->uanon != NULL && |
1008 | (pg->pqflags & PQ_ANON) == 0) { |
1009 | KASSERT(pg->loan_count > 0); |
1010 | pg->loan_count--; |
1011 | pg->pqflags |= PQ_ANON; |
1012 | } |
1013 | if (pg->loan_count == 0 && pg->uobject == NULL && |
1014 | pg->uanon == NULL) { |
1015 | KASSERT((pg->flags & PG_BUSY) == 0); |
1016 | uvm_pagefree(pg); |
1017 | } |
1018 | if (slock != NULL) { |
1019 | mutex_exit(slock); |
1020 | } |
1021 | } |
1022 | mutex_exit(&uvm_pageqlock); |
1023 | } |
1024 | |
1025 | /* |
1026 | * uvm_unloan: kill loans on pages or anons. |
1027 | */ |
1028 | |
1029 | void |
1030 | uvm_unloan(void *v, int npages, int flags) |
1031 | { |
1032 | if (flags & UVM_LOAN_TOANON) { |
1033 | uvm_unloananon(v, npages); |
1034 | } else { |
1035 | uvm_unloanpage(v, npages); |
1036 | } |
1037 | } |
1038 | |
1039 | /* |
1040 | * Minimal pager for uvm_loanzero_object. We need to provide a "put" |
1041 | * method, because the page can end up on a paging queue, and the |
1042 | * page daemon will want to call pgo_put when it encounters the page |
1043 | * on the inactive list. |
1044 | */ |
1045 | |
1046 | static int |
1047 | ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) |
1048 | { |
1049 | struct vm_page *pg; |
1050 | |
1051 | KDASSERT(uobj == &uvm_loanzero_object); |
1052 | |
1053 | /* |
1054 | * Don't need to do any work here if we're not freeing pages. |
1055 | */ |
1056 | |
1057 | if ((flags & PGO_FREE) == 0) { |
1058 | mutex_exit(uobj->vmobjlock); |
1059 | return 0; |
1060 | } |
1061 | |
1062 | /* |
1063 | * we don't actually want to ever free the uvm_loanzero_page, so |
1064 | * just reactivate or dequeue it. |
1065 | */ |
1066 | |
1067 | pg = TAILQ_FIRST(&uobj->memq); |
1068 | KASSERT(pg != NULL); |
1069 | KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL); |
1070 | |
1071 | mutex_enter(&uvm_pageqlock); |
1072 | if (pg->uanon) |
1073 | uvm_pageactivate(pg); |
1074 | else |
1075 | uvm_pagedequeue(pg); |
1076 | mutex_exit(&uvm_pageqlock); |
1077 | |
1078 | mutex_exit(uobj->vmobjlock); |
1079 | return 0; |
1080 | } |
1081 | |
1082 | static const struct uvm_pagerops = { |
1083 | .pgo_put = ulz_put, |
1084 | }; |
1085 | |
1086 | /* |
1087 | * uvm_loan_init(): initialize the uvm_loan() facility. |
1088 | */ |
1089 | |
1090 | void |
1091 | uvm_loan_init(void) |
1092 | { |
1093 | |
1094 | mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE); |
1095 | uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0); |
1096 | uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock); |
1097 | |
1098 | UVMHIST_INIT(loanhist, 300); |
1099 | } |
1100 | |
1101 | /* |
1102 | * uvm_loanbreak: break loan on a uobj page |
1103 | * |
1104 | * => called with uobj locked |
1105 | * => the page should be busy |
1106 | * => return value: |
1107 | * newly allocated page if succeeded |
1108 | */ |
1109 | struct vm_page * |
1110 | uvm_loanbreak(struct vm_page *uobjpage) |
1111 | { |
1112 | struct vm_page *pg; |
1113 | #ifdef DIAGNOSTIC |
1114 | struct uvm_object *uobj = uobjpage->uobject; |
1115 | #endif |
1116 | |
1117 | KASSERT(uobj != NULL); |
1118 | KASSERT(mutex_owned(uobj->vmobjlock)); |
1119 | KASSERT(uobjpage->flags & PG_BUSY); |
1120 | |
1121 | /* alloc new un-owned page */ |
1122 | pg = uvm_pagealloc(NULL, 0, NULL, 0); |
1123 | if (pg == NULL) |
1124 | return NULL; |
1125 | |
1126 | /* |
1127 | * copy the data from the old page to the new |
1128 | * one and clear the fake flags on the new page (keep it busy). |
1129 | * force a reload of the old page by clearing it from all |
1130 | * pmaps. |
1131 | * transfer dirtiness of the old page to the new page. |
1132 | * then lock the page queues to rename the pages. |
1133 | */ |
1134 | |
1135 | uvm_pagecopy(uobjpage, pg); /* old -> new */ |
1136 | pg->flags &= ~PG_FAKE; |
1137 | pmap_page_protect(uobjpage, VM_PROT_NONE); |
1138 | if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) { |
1139 | pmap_clear_modify(pg); |
1140 | pg->flags |= PG_CLEAN; |
1141 | } else { |
1142 | /* uvm_pagecopy marked it dirty */ |
1143 | KASSERT((pg->flags & PG_CLEAN) == 0); |
1144 | /* a object with a dirty page should be dirty. */ |
1145 | KASSERT(!UVM_OBJ_IS_CLEAN(uobj)); |
1146 | } |
1147 | if (uobjpage->flags & PG_WANTED) |
1148 | wakeup(uobjpage); |
1149 | /* uobj still locked */ |
1150 | uobjpage->flags &= ~(PG_WANTED|PG_BUSY); |
1151 | UVM_PAGE_OWN(uobjpage, NULL); |
1152 | |
1153 | mutex_enter(&uvm_pageqlock); |
1154 | |
1155 | /* |
1156 | * replace uobjpage with new page. |
1157 | */ |
1158 | |
1159 | uvm_pagereplace(uobjpage, pg); |
1160 | |
1161 | /* |
1162 | * if the page is no longer referenced by |
1163 | * an anon (i.e. we are breaking an O->K |
1164 | * loan), then remove it from any pageq's. |
1165 | */ |
1166 | if (uobjpage->uanon == NULL) |
1167 | uvm_pagedequeue(uobjpage); |
1168 | |
1169 | /* |
1170 | * at this point we have absolutely no |
1171 | * control over uobjpage |
1172 | */ |
1173 | |
1174 | /* install new page */ |
1175 | uvm_pageactivate(pg); |
1176 | mutex_exit(&uvm_pageqlock); |
1177 | |
1178 | /* |
1179 | * done! loan is broken and "pg" is |
1180 | * PG_BUSY. it can now replace uobjpage. |
1181 | */ |
1182 | |
1183 | return pg; |
1184 | } |
1185 | |
1186 | int |
1187 | uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj) |
1188 | { |
1189 | struct vm_page *pg; |
1190 | |
1191 | KASSERT(mutex_owned(anon->an_lock)); |
1192 | KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); |
1193 | |
1194 | /* get new un-owned replacement page */ |
1195 | pg = uvm_pagealloc(NULL, 0, NULL, 0); |
1196 | if (pg == NULL) { |
1197 | return ENOMEM; |
1198 | } |
1199 | |
1200 | /* copy old -> new */ |
1201 | uvm_pagecopy(anon->an_page, pg); |
1202 | |
1203 | /* force reload */ |
1204 | pmap_page_protect(anon->an_page, VM_PROT_NONE); |
1205 | mutex_enter(&uvm_pageqlock); /* KILL loan */ |
1206 | |
1207 | anon->an_page->uanon = NULL; |
1208 | /* in case we owned */ |
1209 | anon->an_page->pqflags &= ~PQ_ANON; |
1210 | |
1211 | if (uobj) { |
1212 | /* if we were receiver of loan */ |
1213 | anon->an_page->loan_count--; |
1214 | } else { |
1215 | /* |
1216 | * we were the lender (A->K); need to remove the page from |
1217 | * pageq's. |
1218 | */ |
1219 | uvm_pagedequeue(anon->an_page); |
1220 | } |
1221 | |
1222 | if (uobj) { |
1223 | mutex_exit(uobj->vmobjlock); |
1224 | } |
1225 | |
1226 | /* install new page in anon */ |
1227 | anon->an_page = pg; |
1228 | pg->uanon = anon; |
1229 | pg->pqflags |= PQ_ANON; |
1230 | |
1231 | uvm_pageactivate(pg); |
1232 | mutex_exit(&uvm_pageqlock); |
1233 | |
1234 | pg->flags &= ~(PG_BUSY|PG_FAKE); |
1235 | UVM_PAGE_OWN(pg, NULL); |
1236 | |
1237 | /* done! */ |
1238 | |
1239 | return 0; |
1240 | } |
1241 | |