1 | /* $NetBSD: uvm_pager.c,v 1.110 2014/03/01 18:32:01 christos Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
17 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
19 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
21 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
22 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
23 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
25 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | * |
27 | * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp |
28 | */ |
29 | |
30 | /* |
31 | * uvm_pager.c: generic functions used to assist the pagers. |
32 | */ |
33 | |
34 | #include <sys/cdefs.h> |
35 | __KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.110 2014/03/01 18:32:01 christos Exp $" ); |
36 | |
37 | #include "opt_uvmhist.h" |
38 | #include "opt_readahead.h" |
39 | #include "opt_pagermap.h" |
40 | |
41 | #include <sys/param.h> |
42 | #include <sys/systm.h> |
43 | #include <sys/vnode.h> |
44 | #include <sys/buf.h> |
45 | |
46 | #include <uvm/uvm.h> |
47 | |
48 | /* |
49 | * XXX |
50 | * this is needed until the device strategy interface |
51 | * is changed to do physically-addressed i/o. |
52 | */ |
53 | |
54 | #ifndef PAGER_MAP_DEFAULT_SIZE |
55 | #define (16 * 1024 * 1024) |
56 | #endif |
57 | |
58 | #ifndef PAGER_MAP_SIZE |
59 | #define PAGER_MAP_DEFAULT_SIZE |
60 | #endif |
61 | |
62 | size_t = PAGER_MAP_SIZE; |
63 | |
64 | /* |
65 | * list of uvm pagers in the system |
66 | */ |
67 | |
68 | const struct uvm_pagerops * const [] = { |
69 | &aobj_pager, |
70 | &uvm_deviceops, |
71 | &uvm_vnodeops, |
72 | &ubc_pager, |
73 | }; |
74 | |
75 | /* |
76 | * the pager map: provides KVA for I/O |
77 | */ |
78 | |
79 | struct vm_map *; /* XXX */ |
80 | kmutex_t ; |
81 | bool ; /* locked by pager map */ |
82 | static vaddr_t emergva; |
83 | static int emerg_ncolors; |
84 | static bool emerginuse; |
85 | |
86 | void |
87 | (void) |
88 | { |
89 | vaddr_t new_emergva, old_emergva; |
90 | int old_emerg_ncolors; |
91 | |
92 | if (__predict_true(emergva != 0 && emerg_ncolors >= uvmexp.ncolors)) |
93 | return; |
94 | |
95 | KASSERT(!emerginuse); |
96 | |
97 | new_emergva = uvm_km_alloc(kernel_map, |
98 | round_page(MAXPHYS) + ptoa(uvmexp.ncolors), ptoa(uvmexp.ncolors), |
99 | UVM_KMF_VAONLY); |
100 | |
101 | KASSERT(new_emergva != 0); |
102 | |
103 | old_emergva = emergva; |
104 | old_emerg_ncolors = emerg_ncolors; |
105 | |
106 | /* |
107 | * don't support re-color in late boot anyway. |
108 | */ |
109 | if (0) /* XXX */ |
110 | mutex_enter(&pager_map_wanted_lock); |
111 | |
112 | emergva = new_emergva; |
113 | emerg_ncolors = uvmexp.ncolors; |
114 | wakeup(&old_emergva); |
115 | |
116 | if (0) /* XXX */ |
117 | mutex_exit(&pager_map_wanted_lock); |
118 | |
119 | if (old_emergva) |
120 | uvm_km_free(kernel_map, old_emergva, |
121 | round_page(MAXPHYS) + ptoa(old_emerg_ncolors), |
122 | UVM_KMF_VAONLY); |
123 | } |
124 | |
125 | /* |
126 | * uvm_pager_init: init pagers (at boot time) |
127 | */ |
128 | |
129 | void |
130 | (void) |
131 | { |
132 | u_int lcv; |
133 | vaddr_t sva, eva; |
134 | |
135 | /* |
136 | * init pager map |
137 | */ |
138 | |
139 | sva = 0; |
140 | pager_map = uvm_km_suballoc(kernel_map, &sva, &eva, pager_map_size, 0, |
141 | false, NULL); |
142 | mutex_init(&pager_map_wanted_lock, MUTEX_DEFAULT, IPL_NONE); |
143 | pager_map_wanted = false; |
144 | |
145 | uvm_pager_realloc_emerg(); |
146 | |
147 | /* |
148 | * init ASYNC I/O queue |
149 | */ |
150 | |
151 | TAILQ_INIT(&uvm.aio_done); |
152 | |
153 | /* |
154 | * call pager init functions |
155 | */ |
156 | for (lcv = 0 ; lcv < __arraycount(uvmpagerops); lcv++) { |
157 | if (uvmpagerops[lcv]->pgo_init) |
158 | uvmpagerops[lcv]->pgo_init(); |
159 | } |
160 | } |
161 | |
162 | /* |
163 | * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings |
164 | * |
165 | * we basically just map in a blank map entry to reserve the space in the |
166 | * map and then use pmap_enter() to put the mappings in by hand. |
167 | */ |
168 | |
169 | vaddr_t |
170 | (struct vm_page **pps, int npages, int flags) |
171 | { |
172 | vsize_t size; |
173 | vaddr_t kva; |
174 | vaddr_t cva; |
175 | struct vm_page *pp; |
176 | vm_prot_t prot; |
177 | const bool pdaemon = (curlwp == uvm.pagedaemon_lwp); |
178 | const u_int first_color = VM_PGCOLOR_BUCKET(*pps); |
179 | UVMHIST_FUNC("uvm_pagermapin" ); UVMHIST_CALLED(maphist); |
180 | |
181 | UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d, first_color=%u)" , |
182 | pps, npages, first_color, 0); |
183 | |
184 | /* |
185 | * compute protection. outgoing I/O only needs read |
186 | * access to the page, whereas incoming needs read/write. |
187 | */ |
188 | |
189 | prot = VM_PROT_READ; |
190 | if (flags & UVMPAGER_MAPIN_READ) |
191 | prot |= VM_PROT_WRITE; |
192 | |
193 | ReStart: |
194 | size = ptoa(npages); |
195 | kva = 0; /* let system choose VA */ |
196 | |
197 | if (uvm_map(pager_map, &kva, size, NULL, UVM_UNKNOWN_OFFSET, |
198 | first_color, UVM_FLAG_COLORMATCH | UVM_FLAG_NOMERGE |
199 | | (pdaemon ? UVM_FLAG_NOWAIT : 0)) != 0) { |
200 | if (pdaemon) { |
201 | mutex_enter(&pager_map_wanted_lock); |
202 | if (emerginuse) { |
203 | UVM_UNLOCK_AND_WAIT(&emergva, |
204 | &pager_map_wanted_lock, false, |
205 | "emergva" , 0); |
206 | goto ReStart; |
207 | } |
208 | emerginuse = true; |
209 | mutex_exit(&pager_map_wanted_lock); |
210 | kva = emergva + ptoa(first_color); |
211 | /* The shift implicitly truncates to PAGE_SIZE */ |
212 | KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT)); |
213 | goto enter; |
214 | } |
215 | if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) { |
216 | UVMHIST_LOG(maphist,"<- NOWAIT failed" , 0,0,0,0); |
217 | return(0); |
218 | } |
219 | mutex_enter(&pager_map_wanted_lock); |
220 | pager_map_wanted = true; |
221 | UVMHIST_LOG(maphist, " SLEEPING on pager_map" ,0,0,0,0); |
222 | UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false, |
223 | "pager_map" , 0); |
224 | goto ReStart; |
225 | } |
226 | |
227 | enter: |
228 | /* got it */ |
229 | for (cva = kva; npages != 0; npages--, cva += PAGE_SIZE) { |
230 | pp = *pps++; |
231 | KASSERT(pp); |
232 | // KASSERT(!((VM_PAGE_TO_PHYS(pp) ^ cva) & uvmexp.colormask)); |
233 | KASSERT(pp->flags & PG_BUSY); |
234 | pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot, 0); |
235 | } |
236 | pmap_update(vm_map_pmap(pager_map)); |
237 | |
238 | UVMHIST_LOG(maphist, "<- done (KVA=0x%x)" , kva,0,0,0); |
239 | return(kva); |
240 | } |
241 | |
242 | /* |
243 | * uvm_pagermapout: remove pager_map mapping |
244 | * |
245 | * we remove our mappings by hand and then remove the mapping (waking |
246 | * up anyone wanting space). |
247 | */ |
248 | |
249 | void |
250 | (vaddr_t kva, int npages) |
251 | { |
252 | vsize_t size = ptoa(npages); |
253 | struct vm_map_entry *entries; |
254 | UVMHIST_FUNC("uvm_pagermapout" ); UVMHIST_CALLED(maphist); |
255 | |
256 | UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)" , kva, npages,0,0); |
257 | |
258 | /* |
259 | * duplicate uvm_unmap, but add in pager_map_wanted handling. |
260 | */ |
261 | |
262 | pmap_kremove(kva, size); |
263 | pmap_update(pmap_kernel()); |
264 | |
265 | if ((kva & ~ptoa(uvmexp.colormask)) == emergva) { |
266 | mutex_enter(&pager_map_wanted_lock); |
267 | KASSERT(emerginuse); |
268 | emerginuse = false; |
269 | wakeup(&emergva); |
270 | mutex_exit(&pager_map_wanted_lock); |
271 | return; |
272 | } |
273 | |
274 | vm_map_lock(pager_map); |
275 | uvm_unmap_remove(pager_map, kva, kva + size, &entries, 0); |
276 | mutex_enter(&pager_map_wanted_lock); |
277 | if (pager_map_wanted) { |
278 | pager_map_wanted = false; |
279 | wakeup(pager_map); |
280 | } |
281 | mutex_exit(&pager_map_wanted_lock); |
282 | vm_map_unlock(pager_map); |
283 | if (entries) |
284 | uvm_unmap_detach(entries, 0); |
285 | UVMHIST_LOG(maphist,"<- done" ,0,0,0,0); |
286 | } |
287 | |
288 | /* |
289 | * interrupt-context iodone handler for single-buf i/os |
290 | * or the top-level buf of a nested-buf i/o. |
291 | */ |
292 | |
293 | void |
294 | uvm_aio_biodone(struct buf *bp) |
295 | { |
296 | /* reset b_iodone for when this is a single-buf i/o. */ |
297 | bp->b_iodone = uvm_aio_aiodone; |
298 | |
299 | workqueue_enqueue(uvm.aiodone_queue, &bp->b_work, NULL); |
300 | } |
301 | |
302 | void |
303 | uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, bool write, int error) |
304 | { |
305 | struct uvm_object *uobj; |
306 | struct vm_page *pg; |
307 | kmutex_t *slock; |
308 | int pageout_done; /* number of PG_PAGEOUT pages processed */ |
309 | int swslot; |
310 | int i; |
311 | bool swap; |
312 | UVMHIST_FUNC("uvm_aio_aiodone_pages" ); UVMHIST_CALLED(ubchist); |
313 | |
314 | swslot = 0; |
315 | pageout_done = 0; |
316 | slock = NULL; |
317 | uobj = NULL; |
318 | pg = pgs[0]; |
319 | swap = (pg->uanon != NULL && pg->uobject == NULL) || |
320 | (pg->pqflags & PQ_AOBJ) != 0; |
321 | if (!swap) { |
322 | uobj = pg->uobject; |
323 | slock = uobj->vmobjlock; |
324 | mutex_enter(slock); |
325 | mutex_enter(&uvm_pageqlock); |
326 | } else { |
327 | #if defined(VMSWAP) |
328 | if (error) { |
329 | if (pg->uobject != NULL) { |
330 | swslot = uao_find_swslot(pg->uobject, |
331 | pg->offset >> PAGE_SHIFT); |
332 | } else { |
333 | KASSERT(pg->uanon != NULL); |
334 | swslot = pg->uanon->an_swslot; |
335 | } |
336 | KASSERT(swslot); |
337 | } |
338 | #else /* defined(VMSWAP) */ |
339 | panic("%s: swap" , __func__); |
340 | #endif /* defined(VMSWAP) */ |
341 | } |
342 | for (i = 0; i < npages; i++) { |
343 | #if defined(VMSWAP) |
344 | bool anon_disposed = false; /* XXX gcc */ |
345 | #endif /* defined(VMSWAP) */ |
346 | |
347 | pg = pgs[i]; |
348 | KASSERT(swap || pg->uobject == uobj); |
349 | UVMHIST_LOG(ubchist, "pg %p" , pg, 0,0,0); |
350 | |
351 | #if defined(VMSWAP) |
352 | /* |
353 | * for swap i/os, lock each page's object (or anon) |
354 | * individually since each page may need a different lock. |
355 | */ |
356 | |
357 | if (swap) { |
358 | if (pg->uobject != NULL) { |
359 | slock = pg->uobject->vmobjlock; |
360 | } else { |
361 | slock = pg->uanon->an_lock; |
362 | } |
363 | mutex_enter(slock); |
364 | mutex_enter(&uvm_pageqlock); |
365 | anon_disposed = (pg->flags & PG_RELEASED) != 0; |
366 | KASSERT(!anon_disposed || pg->uobject != NULL || |
367 | pg->uanon->an_ref == 0); |
368 | } |
369 | #endif /* defined(VMSWAP) */ |
370 | |
371 | /* |
372 | * process errors. for reads, just mark the page to be freed. |
373 | * for writes, if the error was ENOMEM, we assume this was |
374 | * a transient failure so we mark the page dirty so that |
375 | * we'll try to write it again later. for all other write |
376 | * errors, we assume the error is permanent, thus the data |
377 | * in the page is lost. bummer. |
378 | */ |
379 | |
380 | if (error) { |
381 | int slot; |
382 | if (!write) { |
383 | pg->flags |= PG_RELEASED; |
384 | continue; |
385 | } else if (error == ENOMEM) { |
386 | if (pg->flags & PG_PAGEOUT) { |
387 | pg->flags &= ~PG_PAGEOUT; |
388 | pageout_done++; |
389 | } |
390 | pg->flags &= ~PG_CLEAN; |
391 | uvm_pageactivate(pg); |
392 | slot = 0; |
393 | } else |
394 | slot = SWSLOT_BAD; |
395 | |
396 | #if defined(VMSWAP) |
397 | if (swap) { |
398 | if (pg->uobject != NULL) { |
399 | int oldslot __diagused; |
400 | oldslot = uao_set_swslot(pg->uobject, |
401 | pg->offset >> PAGE_SHIFT, slot); |
402 | KASSERT(oldslot == swslot + i); |
403 | } else { |
404 | KASSERT(pg->uanon->an_swslot == |
405 | swslot + i); |
406 | pg->uanon->an_swslot = slot; |
407 | } |
408 | } |
409 | #endif /* defined(VMSWAP) */ |
410 | } |
411 | |
412 | /* |
413 | * if the page is PG_FAKE, this must have been a read to |
414 | * initialize the page. clear PG_FAKE and activate the page. |
415 | * we must also clear the pmap "modified" flag since it may |
416 | * still be set from the page's previous identity. |
417 | */ |
418 | |
419 | if (pg->flags & PG_FAKE) { |
420 | KASSERT(!write); |
421 | pg->flags &= ~PG_FAKE; |
422 | #if defined(READAHEAD_STATS) |
423 | pg->pqflags |= PQ_READAHEAD; |
424 | uvm_ra_total.ev_count++; |
425 | #endif /* defined(READAHEAD_STATS) */ |
426 | KASSERT((pg->flags & PG_CLEAN) != 0); |
427 | uvm_pageenqueue(pg); |
428 | pmap_clear_modify(pg); |
429 | } |
430 | |
431 | /* |
432 | * do accounting for pagedaemon i/o and arrange to free |
433 | * the pages instead of just unbusying them. |
434 | */ |
435 | |
436 | if (pg->flags & PG_PAGEOUT) { |
437 | pg->flags &= ~PG_PAGEOUT; |
438 | pageout_done++; |
439 | uvmexp.pdfreed++; |
440 | pg->flags |= PG_RELEASED; |
441 | } |
442 | |
443 | #if defined(VMSWAP) |
444 | /* |
445 | * for swap pages, unlock everything for this page now. |
446 | */ |
447 | |
448 | if (swap) { |
449 | if (pg->uobject == NULL && anon_disposed) { |
450 | mutex_exit(&uvm_pageqlock); |
451 | uvm_anon_release(pg->uanon); |
452 | } else { |
453 | uvm_page_unbusy(&pg, 1); |
454 | mutex_exit(&uvm_pageqlock); |
455 | mutex_exit(slock); |
456 | } |
457 | } |
458 | #endif /* defined(VMSWAP) */ |
459 | } |
460 | uvm_pageout_done(pageout_done); |
461 | if (!swap) { |
462 | uvm_page_unbusy(pgs, npages); |
463 | mutex_exit(&uvm_pageqlock); |
464 | mutex_exit(slock); |
465 | } else { |
466 | #if defined(VMSWAP) |
467 | KASSERT(write); |
468 | |
469 | /* these pages are now only in swap. */ |
470 | mutex_enter(&uvm_swap_data_lock); |
471 | if (error != ENOMEM) { |
472 | KASSERT(uvmexp.swpgonly + npages <= uvmexp.swpginuse); |
473 | uvmexp.swpgonly += npages; |
474 | } |
475 | mutex_exit(&uvm_swap_data_lock); |
476 | if (error) { |
477 | if (error != ENOMEM) |
478 | uvm_swap_markbad(swslot, npages); |
479 | else |
480 | uvm_swap_free(swslot, npages); |
481 | } |
482 | uvmexp.pdpending--; |
483 | #endif /* defined(VMSWAP) */ |
484 | } |
485 | } |
486 | |
487 | /* |
488 | * uvm_aio_aiodone: do iodone processing for async i/os. |
489 | * this should be called in thread context, not interrupt context. |
490 | */ |
491 | |
492 | void |
493 | uvm_aio_aiodone(struct buf *bp) |
494 | { |
495 | int npages = bp->b_bufsize >> PAGE_SHIFT; |
496 | struct vm_page *pgs[npages]; |
497 | int i, error; |
498 | bool write; |
499 | UVMHIST_FUNC("uvm_aio_aiodone" ); UVMHIST_CALLED(ubchist); |
500 | UVMHIST_LOG(ubchist, "bp %p" , bp, 0,0,0); |
501 | |
502 | error = bp->b_error; |
503 | write = (bp->b_flags & B_READ) == 0; |
504 | |
505 | for (i = 0; i < npages; i++) { |
506 | pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); |
507 | UVMHIST_LOG(ubchist, "pgs[%d] = %p" , i, pgs[i],0,0); |
508 | } |
509 | uvm_pagermapout((vaddr_t)bp->b_data, npages); |
510 | |
511 | uvm_aio_aiodone_pages(pgs, npages, write, error); |
512 | |
513 | if (write && (bp->b_cflags & BC_AGE) != 0) { |
514 | mutex_enter(bp->b_objlock); |
515 | vwakeup(bp); |
516 | mutex_exit(bp->b_objlock); |
517 | } |
518 | putiobuf(bp); |
519 | } |
520 | |
521 | /* |
522 | * uvm_pageratop: convert KVAs in the pager map back to their page |
523 | * structures. |
524 | */ |
525 | |
526 | struct vm_page * |
527 | (vaddr_t kva) |
528 | { |
529 | struct vm_page *pg; |
530 | paddr_t pa; |
531 | bool rv __diagused; |
532 | |
533 | rv = pmap_extract(pmap_kernel(), kva, &pa); |
534 | KASSERT(rv); |
535 | pg = PHYS_TO_VM_PAGE(pa); |
536 | KASSERT(pg != NULL); |
537 | return (pg); |
538 | } |
539 | |