1 | /* $NetBSD: union_vfsops.c,v 1.75 2015/07/23 09:45:21 hannken Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1994 The Regents of the University of California. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software donated to Berkeley by |
8 | * Jan-Simon Pendry. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. |
21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. |
33 | * |
34 | * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 |
35 | */ |
36 | |
37 | /* |
38 | * Copyright (c) 1994 Jan-Simon Pendry. |
39 | * All rights reserved. |
40 | * |
41 | * This code is derived from software donated to Berkeley by |
42 | * Jan-Simon Pendry. |
43 | * |
44 | * Redistribution and use in source and binary forms, with or without |
45 | * modification, are permitted provided that the following conditions |
46 | * are met: |
47 | * 1. Redistributions of source code must retain the above copyright |
48 | * notice, this list of conditions and the following disclaimer. |
49 | * 2. Redistributions in binary form must reproduce the above copyright |
50 | * notice, this list of conditions and the following disclaimer in the |
51 | * documentation and/or other materials provided with the distribution. |
52 | * 3. All advertising materials mentioning features or use of this software |
53 | * must display the following acknowledgement: |
54 | * This product includes software developed by the University of |
55 | * California, Berkeley and its contributors. |
56 | * 4. Neither the name of the University nor the names of its contributors |
57 | * may be used to endorse or promote products derived from this software |
58 | * without specific prior written permission. |
59 | * |
60 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
61 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
62 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
63 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
64 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
65 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
66 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
67 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
68 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
69 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
70 | * SUCH DAMAGE. |
71 | * |
72 | * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 |
73 | */ |
74 | |
75 | /* |
76 | * Union Layer |
77 | */ |
78 | |
79 | #include <sys/cdefs.h> |
80 | __KERNEL_RCSID(0, "$NetBSD: union_vfsops.c,v 1.75 2015/07/23 09:45:21 hannken Exp $" ); |
81 | |
82 | #include <sys/param.h> |
83 | #include <sys/systm.h> |
84 | #include <sys/sysctl.h> |
85 | #include <sys/time.h> |
86 | #include <sys/proc.h> |
87 | #include <sys/vnode.h> |
88 | #include <sys/mount.h> |
89 | #include <sys/namei.h> |
90 | #include <sys/malloc.h> |
91 | #include <sys/filedesc.h> |
92 | #include <sys/queue.h> |
93 | #include <sys/stat.h> |
94 | #include <sys/kauth.h> |
95 | #include <sys/module.h> |
96 | |
97 | #include <fs/union/union.h> |
98 | |
99 | MODULE(MODULE_CLASS_VFS, union, NULL); |
100 | |
101 | static struct sysctllog *union_sysctl_log; |
102 | |
103 | /* |
104 | * Mount union filesystem |
105 | */ |
106 | int |
107 | union_mount(struct mount *mp, const char *path, void *data, size_t *data_len) |
108 | { |
109 | struct lwp *l = curlwp; |
110 | int error = 0; |
111 | struct union_args *args = data; |
112 | struct vnode *lowerrootvp = NULLVP; |
113 | struct vnode *upperrootvp = NULLVP; |
114 | struct union_mount *um = 0; |
115 | const char *cp; |
116 | char *xp; |
117 | int len; |
118 | size_t size; |
119 | |
120 | if (args == NULL) |
121 | return EINVAL; |
122 | if (*data_len < sizeof *args) |
123 | return EINVAL; |
124 | |
125 | #ifdef UNION_DIAGNOSTIC |
126 | printf("union_mount(mp = %p)\n" , mp); |
127 | #endif |
128 | |
129 | if (mp->mnt_flag & MNT_GETARGS) { |
130 | um = MOUNTTOUNIONMOUNT(mp); |
131 | if (um == NULL) |
132 | return EIO; |
133 | args->target = NULL; |
134 | args->mntflags = um->um_op; |
135 | *data_len = sizeof *args; |
136 | return 0; |
137 | } |
138 | /* |
139 | * Update is a no-op |
140 | */ |
141 | if (mp->mnt_flag & MNT_UPDATE) { |
142 | /* |
143 | * Need to provide. |
144 | * 1. a way to convert between rdonly and rdwr mounts. |
145 | * 2. support for nfs exports. |
146 | */ |
147 | error = EOPNOTSUPP; |
148 | goto bad; |
149 | } |
150 | |
151 | lowerrootvp = mp->mnt_vnodecovered; |
152 | vref(lowerrootvp); |
153 | |
154 | /* |
155 | * Find upper node. |
156 | */ |
157 | error = namei_simple_user(args->target, |
158 | NSM_FOLLOW_NOEMULROOT, &upperrootvp); |
159 | if (error != 0) |
160 | goto bad; |
161 | |
162 | if (upperrootvp->v_type != VDIR) { |
163 | error = EINVAL; |
164 | goto bad; |
165 | } |
166 | |
167 | um = kmem_zalloc(sizeof(struct union_mount), KM_SLEEP); |
168 | |
169 | /* |
170 | * Keep a held reference to the target vnodes. |
171 | * They are vrele'd in union_unmount. |
172 | * |
173 | * Depending on the _BELOW flag, the filesystems are |
174 | * viewed in a different order. In effect, this is the |
175 | * same as providing a mount under option to the mount syscall. |
176 | */ |
177 | |
178 | um->um_op = args->mntflags & UNMNT_OPMASK; |
179 | switch (um->um_op) { |
180 | case UNMNT_ABOVE: |
181 | um->um_lowervp = lowerrootvp; |
182 | um->um_uppervp = upperrootvp; |
183 | break; |
184 | |
185 | case UNMNT_BELOW: |
186 | um->um_lowervp = upperrootvp; |
187 | um->um_uppervp = lowerrootvp; |
188 | break; |
189 | |
190 | case UNMNT_REPLACE: |
191 | vrele(lowerrootvp); |
192 | lowerrootvp = NULLVP; |
193 | um->um_uppervp = upperrootvp; |
194 | um->um_lowervp = lowerrootvp; |
195 | break; |
196 | |
197 | default: |
198 | error = EINVAL; |
199 | goto bad; |
200 | } |
201 | |
202 | mp->mnt_iflag |= IMNT_MPSAFE; |
203 | |
204 | /* |
205 | * Unless the mount is readonly, ensure that the top layer |
206 | * supports whiteout operations |
207 | */ |
208 | if ((mp->mnt_flag & MNT_RDONLY) == 0) { |
209 | vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY); |
210 | error = VOP_WHITEOUT(um->um_uppervp, |
211 | (struct componentname *) 0, LOOKUP); |
212 | VOP_UNLOCK(um->um_uppervp); |
213 | if (error) |
214 | goto bad; |
215 | } |
216 | |
217 | um->um_cred = l->l_cred; |
218 | kauth_cred_hold(um->um_cred); |
219 | um->um_cmode = UN_DIRMODE &~ l->l_proc->p_cwdi->cwdi_cmask; |
220 | |
221 | /* |
222 | * Depending on what you think the MNT_LOCAL flag might mean, |
223 | * you may want the && to be || on the conditional below. |
224 | * At the moment it has been defined that the filesystem is |
225 | * only local if it is all local, ie the MNT_LOCAL flag implies |
226 | * that the entire namespace is local. If you think the MNT_LOCAL |
227 | * flag implies that some of the files might be stored locally |
228 | * then you will want to change the conditional. |
229 | */ |
230 | if (um->um_op == UNMNT_ABOVE) { |
231 | if (((um->um_lowervp == NULLVP) || |
232 | (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) && |
233 | (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) |
234 | mp->mnt_flag |= MNT_LOCAL; |
235 | } |
236 | |
237 | /* |
238 | * Copy in the upper layer's RDONLY flag. This is for the benefit |
239 | * of lookup() which explicitly checks the flag, rather than asking |
240 | * the filesystem for its own opinion. This means, that an update |
241 | * mount of the underlying filesystem to go from rdonly to rdwr |
242 | * will leave the unioned view as read-only. |
243 | */ |
244 | mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); |
245 | |
246 | mp->mnt_data = um; |
247 | vfs_getnewfsid(mp); |
248 | |
249 | error = set_statvfs_info( path, UIO_USERSPACE, NULL, UIO_USERSPACE, |
250 | mp->mnt_op->vfs_name, mp, l); |
251 | if (error) |
252 | goto bad; |
253 | |
254 | switch (um->um_op) { |
255 | case UNMNT_ABOVE: |
256 | cp = "<above>:" ; |
257 | break; |
258 | case UNMNT_BELOW: |
259 | cp = "<below>:" ; |
260 | break; |
261 | case UNMNT_REPLACE: |
262 | cp = "" ; |
263 | break; |
264 | default: |
265 | cp = "<invalid>:" ; |
266 | #ifdef DIAGNOSTIC |
267 | panic("union_mount: bad um_op" ); |
268 | #endif |
269 | break; |
270 | } |
271 | len = strlen(cp); |
272 | memcpy(mp->mnt_stat.f_mntfromname, cp, len); |
273 | |
274 | xp = mp->mnt_stat.f_mntfromname + len; |
275 | len = MNAMELEN - len; |
276 | |
277 | (void) copyinstr(args->target, xp, len - 1, &size); |
278 | memset(xp + size, 0, len - size); |
279 | |
280 | #ifdef UNION_DIAGNOSTIC |
281 | printf("union_mount: from %s, on %s\n" , |
282 | mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); |
283 | #endif |
284 | |
285 | /* Setup the readdir hook if it's not set already */ |
286 | if (!vn_union_readdir_hook) |
287 | vn_union_readdir_hook = union_readdirhook; |
288 | |
289 | return (0); |
290 | |
291 | bad: |
292 | if (um) |
293 | kmem_free(um, sizeof(struct union_mount)); |
294 | if (upperrootvp) |
295 | vrele(upperrootvp); |
296 | if (lowerrootvp) |
297 | vrele(lowerrootvp); |
298 | return (error); |
299 | } |
300 | |
301 | /* |
302 | * VFS start. Nothing needed here - the start routine |
303 | * on the underlying filesystem(s) will have been called |
304 | * when that filesystem was mounted. |
305 | */ |
306 | /*ARGSUSED*/ |
307 | int |
308 | union_start(struct mount *mp, int flags) |
309 | { |
310 | |
311 | return (0); |
312 | } |
313 | |
314 | /* |
315 | * Free reference to union layer |
316 | */ |
317 | static bool |
318 | union_unmount_selector(void *cl, struct vnode *vp) |
319 | { |
320 | int *count = cl; |
321 | |
322 | *count += 1; |
323 | return false; |
324 | } |
325 | |
326 | int |
327 | union_unmount(struct mount *mp, int mntflags) |
328 | { |
329 | struct union_mount *um = MOUNTTOUNIONMOUNT(mp); |
330 | int freeing; |
331 | int error; |
332 | |
333 | #ifdef UNION_DIAGNOSTIC |
334 | printf("union_unmount(mp = %p)\n" , mp); |
335 | #endif |
336 | |
337 | /* |
338 | * Keep flushing vnodes from the mount list. |
339 | * This is needed because of the un_pvp held |
340 | * reference to the parent vnode. |
341 | * If more vnodes have been freed on a given pass, |
342 | * the try again. The loop will iterate at most |
343 | * (d) times, where (d) is the maximum tree depth |
344 | * in the filesystem. |
345 | */ |
346 | for (freeing = 0; (error = vflush(mp, NULL, 0)) != 0;) { |
347 | struct vnode_iterator *marker; |
348 | int n; |
349 | |
350 | /* count #vnodes held on mount list */ |
351 | n = 0; |
352 | vfs_vnode_iterator_init(mp, &marker); |
353 | vfs_vnode_iterator_next(marker, union_unmount_selector, &n); |
354 | vfs_vnode_iterator_destroy(marker); |
355 | |
356 | /* if this is unchanged then stop */ |
357 | if (n == freeing) |
358 | break; |
359 | |
360 | /* otherwise try once more time */ |
361 | freeing = n; |
362 | } |
363 | |
364 | /* |
365 | * Ok, now that we've tried doing it gently, get out the hammer. |
366 | */ |
367 | |
368 | if (mntflags & MNT_FORCE) |
369 | error = vflush(mp, NULL, FORCECLOSE); |
370 | |
371 | if (error) |
372 | return error; |
373 | |
374 | /* |
375 | * Discard references to upper and lower target vnodes. |
376 | */ |
377 | if (um->um_lowervp) |
378 | vrele(um->um_lowervp); |
379 | vrele(um->um_uppervp); |
380 | kauth_cred_free(um->um_cred); |
381 | /* |
382 | * Finally, throw away the union_mount structure |
383 | */ |
384 | kmem_free(um, sizeof(struct union_mount)); |
385 | mp->mnt_data = NULL; |
386 | return 0; |
387 | } |
388 | |
389 | int |
390 | union_root(struct mount *mp, struct vnode **vpp) |
391 | { |
392 | struct union_mount *um = MOUNTTOUNIONMOUNT(mp); |
393 | int error; |
394 | |
395 | /* |
396 | * Return locked reference to root. |
397 | */ |
398 | vref(um->um_uppervp); |
399 | if (um->um_lowervp) |
400 | vref(um->um_lowervp); |
401 | error = union_allocvp(vpp, mp, NULL, NULL, NULL, |
402 | um->um_uppervp, um->um_lowervp, 1); |
403 | |
404 | if (error) { |
405 | vrele(um->um_uppervp); |
406 | if (um->um_lowervp) |
407 | vrele(um->um_lowervp); |
408 | return error; |
409 | } |
410 | |
411 | vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); |
412 | |
413 | return 0; |
414 | } |
415 | |
416 | int |
417 | union_statvfs(struct mount *mp, struct statvfs *sbp) |
418 | { |
419 | int error; |
420 | struct union_mount *um = MOUNTTOUNIONMOUNT(mp); |
421 | struct statvfs *sbuf = malloc(sizeof(*sbuf), M_TEMP, M_WAITOK | M_ZERO); |
422 | unsigned long lbsize; |
423 | |
424 | #ifdef UNION_DIAGNOSTIC |
425 | printf("union_statvfs(mp = %p, lvp = %p, uvp = %p)\n" , mp, |
426 | um->um_lowervp, um->um_uppervp); |
427 | #endif |
428 | |
429 | if (um->um_lowervp) { |
430 | error = VFS_STATVFS(um->um_lowervp->v_mount, sbuf); |
431 | if (error) |
432 | goto done; |
433 | } |
434 | |
435 | /* now copy across the "interesting" information and fake the rest */ |
436 | lbsize = sbuf->f_bsize; |
437 | sbp->f_blocks = sbuf->f_blocks - sbuf->f_bfree; |
438 | sbp->f_files = sbuf->f_files - sbuf->f_ffree; |
439 | |
440 | error = VFS_STATVFS(um->um_uppervp->v_mount, sbuf); |
441 | if (error) |
442 | goto done; |
443 | |
444 | sbp->f_flag = sbuf->f_flag; |
445 | sbp->f_bsize = sbuf->f_bsize; |
446 | sbp->f_frsize = sbuf->f_frsize; |
447 | sbp->f_iosize = sbuf->f_iosize; |
448 | |
449 | /* |
450 | * The "total" fields count total resources in all layers, |
451 | * the "free" fields count only those resources which are |
452 | * free in the upper layer (since only the upper layer |
453 | * is writable). |
454 | */ |
455 | |
456 | if (sbuf->f_bsize != lbsize) |
457 | sbp->f_blocks = sbp->f_blocks * lbsize / sbuf->f_bsize; |
458 | sbp->f_blocks += sbuf->f_blocks; |
459 | sbp->f_bfree = sbuf->f_bfree; |
460 | sbp->f_bavail = sbuf->f_bavail; |
461 | sbp->f_bresvd = sbuf->f_bresvd; |
462 | sbp->f_files += sbuf->f_files; |
463 | sbp->f_ffree = sbuf->f_ffree; |
464 | sbp->f_favail = sbuf->f_favail; |
465 | sbp->f_fresvd = sbuf->f_fresvd; |
466 | |
467 | copy_statvfs_info(sbp, mp); |
468 | done: |
469 | free(sbuf, M_TEMP); |
470 | return error; |
471 | } |
472 | |
473 | /*ARGSUSED*/ |
474 | int |
475 | union_sync(struct mount *mp, int waitfor, |
476 | kauth_cred_t cred) |
477 | { |
478 | |
479 | /* |
480 | * XXX - Assumes no data cached at union layer. |
481 | */ |
482 | return (0); |
483 | } |
484 | |
485 | /*ARGSUSED*/ |
486 | int |
487 | union_vget(struct mount *mp, ino_t ino, |
488 | struct vnode **vpp) |
489 | { |
490 | |
491 | return (EOPNOTSUPP); |
492 | } |
493 | |
494 | static int |
495 | union_renamelock_enter(struct mount *mp) |
496 | { |
497 | struct union_mount *um = MOUNTTOUNIONMOUNT(mp); |
498 | |
499 | /* Lock just the upper fs, where the action happens. */ |
500 | return VFS_RENAMELOCK_ENTER(um->um_uppervp->v_mount); |
501 | } |
502 | |
503 | static void |
504 | union_renamelock_exit(struct mount *mp) |
505 | { |
506 | struct union_mount *um = MOUNTTOUNIONMOUNT(mp); |
507 | |
508 | VFS_RENAMELOCK_EXIT(um->um_uppervp->v_mount); |
509 | } |
510 | |
511 | extern const struct vnodeopv_desc union_vnodeop_opv_desc; |
512 | |
513 | const struct vnodeopv_desc * const union_vnodeopv_descs[] = { |
514 | &union_vnodeop_opv_desc, |
515 | NULL, |
516 | }; |
517 | |
518 | struct vfsops union_vfsops = { |
519 | .vfs_name = MOUNT_UNION, |
520 | .vfs_min_mount_data = sizeof (struct union_args), |
521 | .vfs_mount = union_mount, |
522 | .vfs_start = union_start, |
523 | .vfs_unmount = union_unmount, |
524 | .vfs_root = union_root, |
525 | .vfs_quotactl = (void *)eopnotsupp, |
526 | .vfs_statvfs = union_statvfs, |
527 | .vfs_sync = union_sync, |
528 | .vfs_vget = union_vget, |
529 | .vfs_loadvnode = union_loadvnode, |
530 | .vfs_fhtovp = (void *)eopnotsupp, |
531 | .vfs_vptofh = (void *)eopnotsupp, |
532 | .vfs_init = union_init, |
533 | .vfs_reinit = union_reinit, |
534 | .vfs_done = union_done, |
535 | .vfs_snapshot = (void *)eopnotsupp, |
536 | .vfs_extattrctl = vfs_stdextattrctl, |
537 | .vfs_suspendctl = (void *)eopnotsupp, |
538 | .vfs_renamelock_enter = union_renamelock_enter, |
539 | .vfs_renamelock_exit = union_renamelock_exit, |
540 | .vfs_fsync = (void *)eopnotsupp, |
541 | .vfs_opv_descs = union_vnodeopv_descs |
542 | }; |
543 | |
544 | static int |
545 | union_modcmd(modcmd_t cmd, void *arg) |
546 | { |
547 | int error; |
548 | |
549 | switch (cmd) { |
550 | case MODULE_CMD_INIT: |
551 | error = vfs_attach(&union_vfsops); |
552 | if (error != 0) |
553 | break; |
554 | sysctl_createv(&union_sysctl_log, 0, NULL, NULL, |
555 | CTLFLAG_PERMANENT, |
556 | CTLTYPE_NODE, "union" , |
557 | SYSCTL_DESCR("Union file system" ), |
558 | NULL, 0, NULL, 0, |
559 | CTL_VFS, 15, CTL_EOL); |
560 | /* |
561 | * XXX the "15" above could be dynamic, thereby eliminating |
562 | * one more instance of the "number to vfs" mapping problem, |
563 | * but "15" is the order as taken from sys/mount.h |
564 | */ |
565 | break; |
566 | case MODULE_CMD_FINI: |
567 | error = vfs_detach(&union_vfsops); |
568 | if (error != 0) |
569 | break; |
570 | sysctl_teardown(&union_sysctl_log); |
571 | break; |
572 | default: |
573 | error = ENOTTY; |
574 | break; |
575 | } |
576 | |
577 | return (error); |
578 | } |
579 | |