1 | /* $NetBSD: tmpfs_vfsops.c,v 1.68 2016/08/26 21:44:24 dholland Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Julio M. Merino Vidal, developed as part of Google's Summer of Code |
9 | * 2005 program. |
10 | * |
11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions |
13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. |
19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ |
32 | |
33 | /* |
34 | * Efficient memory file system. |
35 | * |
36 | * tmpfs is a file system that uses NetBSD's virtual memory sub-system |
37 | * (the well-known UVM) to store file data and metadata in an efficient |
38 | * way. This means that it does not follow the structure of an on-disk |
39 | * file system because it simply does not need to. Instead, it uses |
40 | * memory-specific data structures and algorithms to automatically |
41 | * allocate and release resources. |
42 | */ |
43 | |
44 | #include <sys/cdefs.h> |
45 | __KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.68 2016/08/26 21:44:24 dholland Exp $" ); |
46 | |
47 | #include <sys/param.h> |
48 | #include <sys/atomic.h> |
49 | #include <sys/types.h> |
50 | #include <sys/kmem.h> |
51 | #include <sys/mount.h> |
52 | #include <sys/stat.h> |
53 | #include <sys/systm.h> |
54 | #include <sys/vnode.h> |
55 | #include <sys/kauth.h> |
56 | #include <sys/module.h> |
57 | |
58 | #include <miscfs/genfs/genfs.h> |
59 | #include <fs/tmpfs/tmpfs.h> |
60 | #include <fs/tmpfs/tmpfs_args.h> |
61 | |
62 | MODULE(MODULE_CLASS_VFS, tmpfs, NULL); |
63 | |
64 | struct pool tmpfs_dirent_pool; |
65 | struct pool tmpfs_node_pool; |
66 | |
67 | void |
68 | tmpfs_init(void) |
69 | { |
70 | |
71 | pool_init(&tmpfs_dirent_pool, sizeof(tmpfs_dirent_t), 0, 0, 0, |
72 | "tmpfs_dirent" , &pool_allocator_nointr, IPL_NONE); |
73 | pool_init(&tmpfs_node_pool, sizeof(tmpfs_node_t), 0, 0, 0, |
74 | "tmpfs_node" , &pool_allocator_nointr, IPL_NONE); |
75 | } |
76 | |
77 | void |
78 | tmpfs_done(void) |
79 | { |
80 | |
81 | pool_destroy(&tmpfs_dirent_pool); |
82 | pool_destroy(&tmpfs_node_pool); |
83 | } |
84 | |
85 | int |
86 | tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) |
87 | { |
88 | struct tmpfs_args *args = data; |
89 | tmpfs_mount_t *tmp; |
90 | tmpfs_node_t *root; |
91 | struct vattr va; |
92 | struct vnode *vp; |
93 | uint64_t memlimit; |
94 | ino_t nodes; |
95 | int error; |
96 | bool set_memlimit; |
97 | bool set_nodes; |
98 | |
99 | if (args == NULL) |
100 | return EINVAL; |
101 | |
102 | /* Validate the version. */ |
103 | if (*data_len < sizeof(*args) || |
104 | args->ta_version != TMPFS_ARGS_VERSION) |
105 | return EINVAL; |
106 | |
107 | /* Handle retrieval of mount point arguments. */ |
108 | if (mp->mnt_flag & MNT_GETARGS) { |
109 | if (mp->mnt_data == NULL) |
110 | return EIO; |
111 | tmp = VFS_TO_TMPFS(mp); |
112 | |
113 | args->ta_version = TMPFS_ARGS_VERSION; |
114 | args->ta_nodes_max = tmp->tm_nodes_max; |
115 | args->ta_size_max = tmp->tm_mem_limit; |
116 | |
117 | root = tmp->tm_root; |
118 | args->ta_root_uid = root->tn_uid; |
119 | args->ta_root_gid = root->tn_gid; |
120 | args->ta_root_mode = root->tn_mode; |
121 | |
122 | *data_len = sizeof(*args); |
123 | return 0; |
124 | } |
125 | |
126 | |
127 | /* Prohibit mounts if there is not enough memory. */ |
128 | if (tmpfs_mem_info(true) < uvmexp.freetarg) |
129 | return EINVAL; |
130 | |
131 | /* Check for invalid uid and gid arguments */ |
132 | if (args->ta_root_uid == VNOVAL || args->ta_root_gid == VNOVAL) |
133 | return EINVAL; |
134 | |
135 | /* This can never happen? */ |
136 | if ((args->ta_root_mode & ALLPERMS) == VNOVAL) |
137 | return EINVAL; |
138 | |
139 | /* Get the memory usage limit for this file-system. */ |
140 | if (args->ta_size_max < PAGE_SIZE) { |
141 | memlimit = UINT64_MAX; |
142 | set_memlimit = false; |
143 | } else { |
144 | memlimit = args->ta_size_max; |
145 | set_memlimit = true; |
146 | } |
147 | KASSERT(memlimit > 0); |
148 | |
149 | if (args->ta_nodes_max <= 3) { |
150 | nodes = 3 + (memlimit / 1024); |
151 | set_nodes = false; |
152 | } else { |
153 | nodes = args->ta_nodes_max; |
154 | set_nodes = true; |
155 | } |
156 | nodes = MIN(nodes, INT_MAX); |
157 | KASSERT(nodes >= 3); |
158 | |
159 | if (mp->mnt_flag & MNT_UPDATE) { |
160 | tmp = VFS_TO_TMPFS(mp); |
161 | if (set_nodes && nodes < tmp->tm_nodes_cnt) |
162 | return EBUSY; |
163 | if (set_memlimit) { |
164 | if ((error = tmpfs_mntmem_set(tmp, memlimit)) != 0) |
165 | return error; |
166 | } |
167 | if (set_nodes) |
168 | tmp->tm_nodes_max = nodes; |
169 | root = tmp->tm_root; |
170 | root->tn_uid = args->ta_root_uid; |
171 | root->tn_gid = args->ta_root_gid; |
172 | root->tn_mode = args->ta_root_mode; |
173 | return 0; |
174 | } |
175 | |
176 | /* Allocate the tmpfs mount structure and fill it. */ |
177 | tmp = kmem_zalloc(sizeof(tmpfs_mount_t), KM_SLEEP); |
178 | if (tmp == NULL) |
179 | return ENOMEM; |
180 | |
181 | tmp->tm_nodes_max = nodes; |
182 | tmp->tm_nodes_cnt = 0; |
183 | LIST_INIT(&tmp->tm_nodes); |
184 | |
185 | mutex_init(&tmp->tm_lock, MUTEX_DEFAULT, IPL_NONE); |
186 | tmpfs_mntmem_init(tmp, memlimit); |
187 | mp->mnt_data = tmp; |
188 | |
189 | /* Allocate the root node. */ |
190 | vattr_null(&va); |
191 | va.va_type = VDIR; |
192 | va.va_mode = args->ta_root_mode & ALLPERMS; |
193 | va.va_uid = args->ta_root_uid; |
194 | va.va_gid = args->ta_root_gid; |
195 | error = vcache_new(mp, NULL, &va, NOCRED, &vp); |
196 | if (error) { |
197 | mp->mnt_data = NULL; |
198 | tmpfs_mntmem_destroy(tmp); |
199 | mutex_destroy(&tmp->tm_lock); |
200 | kmem_free(tmp, sizeof(*tmp)); |
201 | return error; |
202 | } |
203 | KASSERT(vp != NULL); |
204 | root = VP_TO_TMPFS_NODE(vp); |
205 | KASSERT(root != NULL); |
206 | |
207 | /* |
208 | * Parent of the root inode is itself. Also, root inode has no |
209 | * directory entry (i.e. is never attached), thus hold an extra |
210 | * reference (link) for it. |
211 | */ |
212 | root->tn_links++; |
213 | root->tn_spec.tn_dir.tn_parent = root; |
214 | tmp->tm_root = root; |
215 | vrele(vp); |
216 | |
217 | mp->mnt_flag |= MNT_LOCAL; |
218 | mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN; |
219 | mp->mnt_fs_bshift = PAGE_SHIFT; |
220 | mp->mnt_dev_bshift = DEV_BSHIFT; |
221 | mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO; |
222 | vfs_getnewfsid(mp); |
223 | |
224 | error = set_statvfs_info(path, UIO_USERSPACE, "tmpfs" , UIO_SYSSPACE, |
225 | mp->mnt_op->vfs_name, mp, curlwp); |
226 | if (error) { |
227 | (void)tmpfs_unmount(mp, MNT_FORCE); |
228 | } |
229 | return error; |
230 | } |
231 | |
232 | int |
233 | tmpfs_start(struct mount *mp, int flags) |
234 | { |
235 | |
236 | return 0; |
237 | } |
238 | |
239 | int |
240 | tmpfs_unmount(struct mount *mp, int mntflags) |
241 | { |
242 | tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); |
243 | tmpfs_node_t *node, *cnode; |
244 | int error, flags = 0; |
245 | |
246 | /* Handle forced unmounts. */ |
247 | if (mntflags & MNT_FORCE) |
248 | flags |= FORCECLOSE; |
249 | |
250 | /* Finalize all pending I/O. */ |
251 | error = vflush(mp, NULL, flags); |
252 | if (error != 0) |
253 | return error; |
254 | |
255 | /* |
256 | * First round, detach and destroy all directory entries. |
257 | * Also, clear the pointers to the vnodes - they are gone. |
258 | */ |
259 | LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { |
260 | tmpfs_dirent_t *de; |
261 | |
262 | node->tn_vnode = NULL; |
263 | if (node->tn_type != VDIR) { |
264 | continue; |
265 | } |
266 | while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) { |
267 | cnode = de->td_node; |
268 | if (cnode && cnode != TMPFS_NODE_WHITEOUT) { |
269 | cnode->tn_vnode = NULL; |
270 | } |
271 | tmpfs_dir_detach(node, de); |
272 | tmpfs_free_dirent(tmp, de); |
273 | } |
274 | /* Extra virtual entry (itself for the root). */ |
275 | node->tn_links--; |
276 | } |
277 | |
278 | /* Release the reference on root (diagnostic). */ |
279 | node = tmp->tm_root; |
280 | node->tn_links--; |
281 | |
282 | /* Second round, destroy all inodes. */ |
283 | while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) { |
284 | tmpfs_free_node(tmp, node); |
285 | } |
286 | |
287 | /* Throw away the tmpfs_mount structure. */ |
288 | tmpfs_mntmem_destroy(tmp); |
289 | mutex_destroy(&tmp->tm_lock); |
290 | kmem_free(tmp, sizeof(*tmp)); |
291 | mp->mnt_data = NULL; |
292 | |
293 | return 0; |
294 | } |
295 | |
296 | int |
297 | tmpfs_root(struct mount *mp, vnode_t **vpp) |
298 | { |
299 | tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root; |
300 | int error; |
301 | |
302 | error = vcache_get(mp, &node, sizeof(node), vpp); |
303 | if (error) |
304 | return error; |
305 | error = vn_lock(*vpp, LK_EXCLUSIVE); |
306 | if (error) { |
307 | vrele(*vpp); |
308 | *vpp = NULL; |
309 | return error; |
310 | } |
311 | |
312 | return 0; |
313 | } |
314 | |
315 | int |
316 | tmpfs_vget(struct mount *mp, ino_t ino, vnode_t **vpp) |
317 | { |
318 | |
319 | return EOPNOTSUPP; |
320 | } |
321 | |
322 | int |
323 | tmpfs_fhtovp(struct mount *mp, struct fid *fhp, vnode_t **vpp) |
324 | { |
325 | tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); |
326 | tmpfs_node_t *node; |
327 | tmpfs_fid_t tfh; |
328 | int error; |
329 | |
330 | if (fhp->fid_len != sizeof(tmpfs_fid_t)) { |
331 | return EINVAL; |
332 | } |
333 | memcpy(&tfh, fhp, sizeof(tmpfs_fid_t)); |
334 | |
335 | mutex_enter(&tmp->tm_lock); |
336 | LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { |
337 | if (node->tn_id == tfh.tf_id) { |
338 | /* Prevent this node from disappearing. */ |
339 | atomic_inc_32(&node->tn_holdcount); |
340 | break; |
341 | } |
342 | } |
343 | mutex_exit(&tmp->tm_lock); |
344 | if (node == NULL) |
345 | return ESTALE; |
346 | |
347 | error = vcache_get(mp, &node, sizeof(node), vpp); |
348 | /* If this node has been reclaimed free it now. */ |
349 | if (atomic_dec_32_nv(&node->tn_holdcount) == TMPFS_NODE_RECLAIMED) { |
350 | KASSERT(error != 0); |
351 | tmpfs_free_node(tmp, node); |
352 | } |
353 | if (error) |
354 | return (error == ENOENT ? ESTALE : error); |
355 | error = vn_lock(*vpp, LK_EXCLUSIVE); |
356 | if (error) { |
357 | vrele(*vpp); |
358 | *vpp = NULL; |
359 | return error; |
360 | } |
361 | if (TMPFS_NODE_GEN(node) != tfh.tf_gen) { |
362 | vput(*vpp); |
363 | *vpp = NULL; |
364 | return ESTALE; |
365 | } |
366 | |
367 | return 0; |
368 | } |
369 | |
370 | int |
371 | tmpfs_vptofh(vnode_t *vp, struct fid *fhp, size_t *fh_size) |
372 | { |
373 | tmpfs_fid_t tfh; |
374 | tmpfs_node_t *node; |
375 | |
376 | if (*fh_size < sizeof(tmpfs_fid_t)) { |
377 | *fh_size = sizeof(tmpfs_fid_t); |
378 | return E2BIG; |
379 | } |
380 | *fh_size = sizeof(tmpfs_fid_t); |
381 | node = VP_TO_TMPFS_NODE(vp); |
382 | |
383 | memset(&tfh, 0, sizeof(tfh)); |
384 | tfh.tf_len = sizeof(tmpfs_fid_t); |
385 | tfh.tf_gen = TMPFS_NODE_GEN(node); |
386 | tfh.tf_id = node->tn_id; |
387 | memcpy(fhp, &tfh, sizeof(tfh)); |
388 | |
389 | return 0; |
390 | } |
391 | |
392 | int |
393 | tmpfs_statvfs(struct mount *mp, struct statvfs *sbp) |
394 | { |
395 | tmpfs_mount_t *tmp; |
396 | fsfilcnt_t freenodes; |
397 | size_t avail; |
398 | |
399 | tmp = VFS_TO_TMPFS(mp); |
400 | |
401 | sbp->f_iosize = sbp->f_frsize = sbp->f_bsize = PAGE_SIZE; |
402 | |
403 | mutex_enter(&tmp->tm_acc_lock); |
404 | avail = tmpfs_pages_avail(tmp); |
405 | sbp->f_blocks = (tmpfs_bytes_max(tmp) >> PAGE_SHIFT); |
406 | sbp->f_bavail = sbp->f_bfree = avail; |
407 | sbp->f_bresvd = 0; |
408 | |
409 | freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt, |
410 | avail * PAGE_SIZE / sizeof(tmpfs_node_t)); |
411 | |
412 | sbp->f_files = tmp->tm_nodes_cnt + freenodes; |
413 | sbp->f_favail = sbp->f_ffree = freenodes; |
414 | sbp->f_fresvd = 0; |
415 | mutex_exit(&tmp->tm_acc_lock); |
416 | |
417 | copy_statvfs_info(sbp, mp); |
418 | |
419 | return 0; |
420 | } |
421 | |
422 | int |
423 | tmpfs_sync(struct mount *mp, int waitfor, kauth_cred_t uc) |
424 | { |
425 | |
426 | return 0; |
427 | } |
428 | |
429 | int |
430 | tmpfs_snapshot(struct mount *mp, vnode_t *vp, struct timespec *ctime) |
431 | { |
432 | |
433 | return EOPNOTSUPP; |
434 | } |
435 | |
436 | /* |
437 | * tmpfs vfs operations. |
438 | */ |
439 | |
440 | extern const struct vnodeopv_desc tmpfs_fifoop_opv_desc; |
441 | extern const struct vnodeopv_desc tmpfs_specop_opv_desc; |
442 | extern const struct vnodeopv_desc tmpfs_vnodeop_opv_desc; |
443 | |
444 | const struct vnodeopv_desc * const tmpfs_vnodeopv_descs[] = { |
445 | &tmpfs_fifoop_opv_desc, |
446 | &tmpfs_specop_opv_desc, |
447 | &tmpfs_vnodeop_opv_desc, |
448 | NULL, |
449 | }; |
450 | |
451 | struct vfsops tmpfs_vfsops = { |
452 | .vfs_name = MOUNT_TMPFS, |
453 | .vfs_min_mount_data = sizeof (struct tmpfs_args), |
454 | .vfs_mount = tmpfs_mount, |
455 | .vfs_start = tmpfs_start, |
456 | .vfs_unmount = tmpfs_unmount, |
457 | .vfs_root = tmpfs_root, |
458 | .vfs_quotactl = (void *)eopnotsupp, |
459 | .vfs_statvfs = tmpfs_statvfs, |
460 | .vfs_sync = tmpfs_sync, |
461 | .vfs_vget = tmpfs_vget, |
462 | .vfs_loadvnode = tmpfs_loadvnode, |
463 | .vfs_newvnode = tmpfs_newvnode, |
464 | .vfs_fhtovp = tmpfs_fhtovp, |
465 | .vfs_vptofh = tmpfs_vptofh, |
466 | .vfs_init = tmpfs_init, |
467 | .vfs_done = tmpfs_done, |
468 | .vfs_snapshot = tmpfs_snapshot, |
469 | .vfs_extattrctl = vfs_stdextattrctl, |
470 | .vfs_suspendctl = (void *)eopnotsupp, |
471 | .vfs_renamelock_enter = genfs_renamelock_enter, |
472 | .vfs_renamelock_exit = genfs_renamelock_exit, |
473 | .vfs_fsync = (void *)eopnotsupp, |
474 | .vfs_opv_descs = tmpfs_vnodeopv_descs |
475 | }; |
476 | |
477 | static int |
478 | tmpfs_modcmd(modcmd_t cmd, void *arg) |
479 | { |
480 | |
481 | switch (cmd) { |
482 | case MODULE_CMD_INIT: |
483 | return vfs_attach(&tmpfs_vfsops); |
484 | case MODULE_CMD_FINI: |
485 | return vfs_detach(&tmpfs_vfsops); |
486 | default: |
487 | return ENOTTY; |
488 | } |
489 | } |
490 | |