vfs_subr.c source code [src/src/sys/kern/vfs_subr.c]

1	/ $NetBSD: vfs_subr.c,v 1.451 2016/11/03 11:04:21 hannken Exp $ /
2
3	/-*
4	* Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9	* NASA Ames Research Center, by Charles M. Hannum, by Andrew Doran,
10	* by Marshall Kirk McKusick and Greg Ganger at the University of Michigan.
11	*
12	* Redistribution and use in source and binary forms, with or without
13	* modification, are permitted provided that the following conditions
14	* are met:
15	* 1. Redistributions of source code must retain the above copyright
16	* notice, this list of conditions and the following disclaimer.
17	* 2. Redistributions in binary form must reproduce the above copyright
18	* notice, this list of conditions and the following disclaimer in the
19	* documentation and/or other materials provided with the distribution.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31	* POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/*
35	* Copyright (c) 1989, 1993
36	* The Regents of the University of California. All rights reserved.
37	* (c) UNIX System Laboratories, Inc.
38	* All or some portions of this file are derived from material licensed
39	* to the University of California by American Telephone and Telegraph
40	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
41	* the permission of UNIX System Laboratories, Inc.
42	*
43	* Redistribution and use in source and binary forms, with or without
44	* modification, are permitted provided that the following conditions
45	* are met:
46	* 1. Redistributions of source code must retain the above copyright
47	* notice, this list of conditions and the following disclaimer.
48	* 2. Redistributions in binary form must reproduce the above copyright
49	* notice, this list of conditions and the following disclaimer in the
50	* documentation and/or other materials provided with the distribution.
51	* 3. Neither the name of the University nor the names of its contributors
52	* may be used to endorse or promote products derived from this software
53	* without specific prior written permission.
54	*
55	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65	* SUCH DAMAGE.
66	*
67	* @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
68	*/
69
70	#include <sys/cdefs.h>
71	__KERNEL_RCSID(`0`, "$NetBSD: vfs_subr.c,v 1.451 2016/11/03 11:04:21 hannken Exp $");
72
73	#ifdef _KERNEL_OPT
74	#include "opt_ddb.h"
75	#include "opt_compat_netbsd.h"
76	#include "opt_compat_43.h"
77	#endif
78
79	#include <sys/param.h>
80	#include <sys/systm.h>
81	#include <sys/conf.h>
82	#include <sys/dirent.h>
83	#include <sys/filedesc.h>
84	#include <sys/kernel.h>
85	#include <sys/mount.h>
86	#include <sys/vnode_impl.h>
87	#include <sys/stat.h>
88	#include <sys/sysctl.h>
89	#include <sys/namei.h>
90	#include <sys/buf.h>
91	#include <sys/errno.h>
92	#include <sys/kmem.h>
93	#include <sys/syscallargs.h>
94	#include <sys/kauth.h>
95	#include <sys/module.h>
96
97	#include <miscfs/genfs/genfs.h>
98	#include <miscfs/specfs/specdev.h>
99	#include <uvm/uvm_ddb.h>
100
101	const enum vtype iftovt_tab[`16`] = {
102	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
103	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
104	};
105	const int vttoif_tab[`9`] = {
106	`0`, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
107	S_IFSOCK, S_IFIFO, S_IFMT,
108	};
109
110	/*
111	* Insq/Remq for the vnode usage lists.
112	*/
113	#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
114	#define bufremvn(bp) { \
115	LIST_REMOVE(bp, b_vnbufs); \
116	(bp)->b_vnbufs.le_next = NOLIST; \
117	}
118
119	int doforce = `1`; / 1 => permit forcible unmounting /
120	int prtactive = `0`; / 1 => print out reclaim of active vnodes /
121
122	extern struct mount *dead_rootmount;
123
124	/*
125	* Local declarations.
126	*/
127
128	static void vn_initialize_syncerd(void);
129
130	/*
131	* Initialize the vnode management data structures.
132	*/
133	void
134	vntblinit(void)
135	{
136
137	vn_initialize_syncerd();
138	vfs_mount_sysinit();
139	vfs_vnode_sysinit();
140	}
141
142	/*
143	* Flush out and invalidate all buffers associated with a vnode.
144	* Called with the underlying vnode locked, which should prevent new dirty
145	* buffers from being queued.
146	*/
147	int
148	vinvalbuf(struct vnode vp, int* flags, kauth_cred_t cred, struct lwp *l,
149	bool catch_p, int slptimeo)
150	{
151	struct buf bp, nbp;
152	int error;
153	int flushflags = PGO_ALLPAGES \| PGO_FREE \| PGO_SYNCIO \|
154	(flags & V_SAVE ? PGO_CLEANIT \| PGO_RECLAIM : `0`);
155
156	/ XXXUBC this doesn't look at flags or slp* /
157	mutex_enter(vp->v_interlock);
158	error = VOP_PUTPAGES(vp, `0`, `0`, flushflags);
159	if (error) {
160	return error;
161	}
162
163	if (flags & V_SAVE) {
164	error = VOP_FSYNC(vp, cred, FSYNC_WAIT\|FSYNC_RECLAIM, `0`, `0`);
165	if (error)
166	return (error);
167	KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
168	}
169
170	mutex_enter(&bufcache_lock);
171	restart:
172	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
173	KASSERT(bp->b_vp == vp);
174	nbp = LIST_NEXT(bp, b_vnbufs);
175	error = bbusy(bp, catch_p, slptimeo, NULL);
176	if (error != `0`) {
177	if (error == EPASSTHROUGH)
178	goto restart;
179	mutex_exit(&bufcache_lock);
180	return (error);
181	}
182	brelsel(bp, BC_INVAL \| BC_VFLUSH);
183	}
184
185	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
186	KASSERT(bp->b_vp == vp);
187	nbp = LIST_NEXT(bp, b_vnbufs);
188	error = bbusy(bp, catch_p, slptimeo, NULL);
189	if (error != `0`) {
190	if (error == EPASSTHROUGH)
191	goto restart;
192	mutex_exit(&bufcache_lock);
193	return (error);
194	}
195	/*
196	* XXX Since there are no node locks for NFS, I believe
197	* there is a slight chance that a delayed write will
198	* occur while sleeping just above, so check for it.
199	*/
200	if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
201	#ifdef DEBUG
202	printf("buffer still DELWRI\n");
203	#endif
204	bp->b_cflags \|= BC_BUSY \| BC_VFLUSH;
205	mutex_exit(&bufcache_lock);
206	VOP_BWRITE(bp->b_vp, bp);
207	mutex_enter(&bufcache_lock);
208	goto restart;
209	}
210	brelsel(bp, BC_INVAL \| BC_VFLUSH);
211	}
212
213	#ifdef DIAGNOSTIC
214	if (!LIST_EMPTY(&vp->v_cleanblkhd) \|\| !LIST_EMPTY(&vp->v_dirtyblkhd))
215	panic("vinvalbuf: flush failed, vp %p", vp);
216	#endif
217
218	mutex_exit(&bufcache_lock);
219
220	return (`0`);
221	}
222
223	/*
224	* Destroy any in core blocks past the truncation length.
225	* Called with the underlying vnode locked, which should prevent new dirty
226	* buffers from being queued.
227	*/
228	int
229	vtruncbuf(struct vnode vp, daddr_t lbn, bool catch_p, int* slptimeo)
230	{
231	struct buf bp, nbp;
232	int error;
233	voff_t off;
234
235	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
236	mutex_enter(vp->v_interlock);
237	error = VOP_PUTPAGES(vp, off, `0`, PGO_FREE \| PGO_SYNCIO);
238	if (error) {
239	return error;
240	}
241
242	mutex_enter(&bufcache_lock);
243	restart:
244	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
245	KASSERT(bp->b_vp == vp);
246	nbp = LIST_NEXT(bp, b_vnbufs);
247	if (bp->b_lblkno < lbn)
248	continue;
249	error = bbusy(bp, catch_p, slptimeo, NULL);
250	if (error != `0`) {
251	if (error == EPASSTHROUGH)
252	goto restart;
253	mutex_exit(&bufcache_lock);
254	return (error);
255	}
256	brelsel(bp, BC_INVAL \| BC_VFLUSH);
257	}
258
259	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
260	KASSERT(bp->b_vp == vp);
261	nbp = LIST_NEXT(bp, b_vnbufs);
262	if (bp->b_lblkno < lbn)
263	continue;
264	error = bbusy(bp, catch_p, slptimeo, NULL);
265	if (error != `0`) {
266	if (error == EPASSTHROUGH)
267	goto restart;
268	mutex_exit(&bufcache_lock);
269	return (error);
270	}
271	brelsel(bp, BC_INVAL \| BC_VFLUSH);
272	}
273	mutex_exit(&bufcache_lock);
274
275	return (`0`);
276	}
277
278	/*
279	* Flush all dirty buffers from a vnode.
280	* Called with the underlying vnode locked, which should prevent new dirty
281	* buffers from being queued.
282	*/
283	int
284	vflushbuf(struct vnode vp, int* flags)
285	{
286	struct buf bp, nbp;
287	int error, pflags;
288	bool dirty, sync;
289
290	sync = (flags & FSYNC_WAIT) != `0`;
291	pflags = PGO_CLEANIT \| PGO_ALLPAGES \|
292	(sync ? PGO_SYNCIO : `0`) \|
293	((flags & FSYNC_LAZY) ? PGO_LAZY : `0`);
294	mutex_enter(vp->v_interlock);
295	(void) VOP_PUTPAGES(vp, `0`, `0`, pflags);
296
297	loop:
298	mutex_enter(&bufcache_lock);
299	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
300	KASSERT(bp->b_vp == vp);
301	nbp = LIST_NEXT(bp, b_vnbufs);
302	if ((bp->b_cflags & BC_BUSY))
303	continue;
304	if ((bp->b_oflags & BO_DELWRI) == `0`)
305	panic("vflushbuf: not dirty, bp %p", bp);
306	bp->b_cflags \|= BC_BUSY \| BC_VFLUSH;
307	mutex_exit(&bufcache_lock);
308	/*
309	* Wait for I/O associated with indirect blocks to complete,
310	* since there is no way to quickly wait for them below.
311	*/
312	if (bp->b_vp == vp \|\| !sync)
313	(void) bawrite(bp);
314	else {
315	error = bwrite(bp);
316	if (error)
317	return error;
318	}
319	goto loop;
320	}
321	mutex_exit(&bufcache_lock);
322
323	if (!sync)
324	return `0`;
325
326	mutex_enter(vp->v_interlock);
327	while (vp->v_numoutput != `0`)
328	cv_wait(&vp->v_cv, vp->v_interlock);
329	dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
330	mutex_exit(vp->v_interlock);
331
332	if (dirty) {
333	vprint("vflushbuf: dirty", vp);
334	goto loop;
335	}
336
337	return `0`;
338	}
339
340	/*
341	* Create a vnode for a block device.
342	* Used for root filesystem and swap areas.
343	* Also used for memory file system special devices.
344	*/
345	int
346	bdevvp(dev_t dev, vnode_t **vpp)
347	{
348	struct vattr va;
349
350	vattr_null(&va);
351	va.va_type = VBLK;
352	va.va_rdev = dev;
353
354	return vcache_new(dead_rootmount, NULL, &va, NOCRED, vpp);
355	}
356
357	/*
358	* Create a vnode for a character device.
359	* Used for kernfs and some console handling.
360	*/
361	int
362	cdevvp(dev_t dev, vnode_t **vpp)
363	{
364	struct vattr va;
365
366	vattr_null(&va);
367	va.va_type = VCHR;
368	va.va_rdev = dev;
369
370	return vcache_new(dead_rootmount, NULL, &va, NOCRED, vpp);
371	}
372
373	/*
374	* Associate a buffer with a vnode. There must already be a hold on
375	* the vnode.
376	*/
377	void
378	bgetvp(struct vnode vp, struct* buf *bp)
379	{
380
381	KASSERT(bp->b_vp == NULL);
382	KASSERT(bp->b_objlock == &buffer_lock);
383	KASSERT(mutex_owned(vp->v_interlock));
384	KASSERT(mutex_owned(&bufcache_lock));
385	KASSERT((bp->b_cflags & BC_BUSY) != `0`);
386	KASSERT(!cv_has_waiters(&bp->b_done));
387
388	vholdl(vp);
389	bp->b_vp = vp;
390	if (vp->v_type == VBLK \|\| vp->v_type == VCHR)
391	bp->b_dev = vp->v_rdev;
392	else
393	bp->b_dev = NODEV;
394
395	/*
396	* Insert onto list for new vnode.
397	*/
398	bufinsvn(bp, &vp->v_cleanblkhd);
399	bp->b_objlock = vp->v_interlock;
400	}
401
402	/*
403	* Disassociate a buffer from a vnode.
404	*/
405	void
406	brelvp(struct buf *bp)
407	{
408	struct vnode *vp = bp->b_vp;
409
410	KASSERT(vp != NULL);
411	KASSERT(bp->b_objlock == vp->v_interlock);
412	KASSERT(mutex_owned(vp->v_interlock));
413	KASSERT(mutex_owned(&bufcache_lock));
414	KASSERT((bp->b_cflags & BC_BUSY) != `0`);
415	KASSERT(!cv_has_waiters(&bp->b_done));
416
417	/*
418	* Delete from old vnode list, if on one.
419	*/
420	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
421	bufremvn(bp);
422
423	if (vp->v_uobj.uo_npages == `0` && (vp->v_iflag & VI_ONWORKLST) &&
424	LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
425	vp->v_iflag &= ~VI_WRMAPDIRTY;
426	vn_syncer_remove_from_worklist(vp);
427	}
428
429	bp->b_objlock = &buffer_lock;
430	bp->b_vp = NULL;
431	holdrelel(vp);
432	}
433
434	/*
435	* Reassign a buffer from one vnode list to another.
436	* The list reassignment must be within the same vnode.
437	* Used to assign file specific control information
438	* (indirect blocks) to the list to which they belong.
439	*/
440	void
441	reassignbuf(struct buf bp, struct* vnode *vp)
442	{
443	struct buflists *listheadp;
444	int delayx;
445
446	KASSERT(mutex_owned(&bufcache_lock));
447	KASSERT(bp->b_objlock == vp->v_interlock);
448	KASSERT(mutex_owned(vp->v_interlock));
449	KASSERT((bp->b_cflags & BC_BUSY) != `0`);
450
451	/*
452	* Delete from old vnode list, if on one.
453	*/
454	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
455	bufremvn(bp);
456
457	/*
458	* If dirty, put on list of dirty buffers;
459	* otherwise insert onto list of clean buffers.
460	*/
461	if ((bp->b_oflags & BO_DELWRI) == `0`) {
462	listheadp = &vp->v_cleanblkhd;
463	if (vp->v_uobj.uo_npages == `0` &&
464	(vp->v_iflag & VI_ONWORKLST) &&
465	LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
466	vp->v_iflag &= ~VI_WRMAPDIRTY;
467	vn_syncer_remove_from_worklist(vp);
468	}
469	} else {
470	listheadp = &vp->v_dirtyblkhd;
471	if ((vp->v_iflag & VI_ONWORKLST) == `0`) {
472	switch (vp->v_type) {
473	case VDIR:
474	delayx = dirdelay;
475	break;
476	case VBLK:
477	if (spec_node_getmountedfs(vp) != NULL) {
478	delayx = metadelay;
479	break;
480	}
481	/ fall through /
482	default:
483	delayx = filedelay;
484	break;
485	}
486	if (!vp->v_mount \|\|
487	(vp->v_mount->mnt_flag & MNT_ASYNC) == `0`)
488	vn_syncer_add_to_worklist(vp, delayx);
489	}
490	}
491	bufinsvn(bp, listheadp);
492	}
493
494	/*
495	* Lookup a vnode by device number and return it referenced.
496	*/
497	int
498	vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
499	{
500
501	return (spec_node_lookup_by_dev(type, dev, vpp) == `0`);
502	}
503
504	/*
505	* Revoke all the vnodes corresponding to the specified minor number
506	* range (endpoints inclusive) of the specified major.
507	*/
508	void
509	vdevgone(int maj, int minl, int minh, enum vtype type)
510	{
511	vnode_t *vp;
512	dev_t dev;
513	int mn;
514
515	for (mn = minl; mn <= minh; mn++) {
516	dev = makedev(maj, mn);
517	while (spec_node_lookup_by_dev(type, dev, &vp) == `0`) {
518	VOP_REVOKE(vp, REVOKEALL);
519	vrele(vp);
520	}
521	}
522	}
523
524	/*
525	* The filesystem synchronizer mechanism - syncer.
526	*
527	* It is useful to delay writes of file data and filesystem metadata for
528	* a certain amount of time so that quickly created and deleted files need
529	* not waste disk bandwidth being created and removed. To implement this,
530	* vnodes are appended to a "workitem" queue.
531	*
532	* Most pending metadata should not wait for more than ten seconds. Thus,
533	* mounted on block devices are delayed only about a half the time that file
534	* data is delayed. Similarly, directory updates are more critical, so are
535	* only delayed about a third the time that file data is delayed.
536	*
537	* There are SYNCER_MAXDELAY queues that are processed in a round-robin
538	* manner at a rate of one each second (driven off the filesystem syner
539	* thread). The syncer_delayno variable indicates the next queue that is
540	* to be processed. Items that need to be processed soon are placed in
541	* this queue:
542	*
543	* syncer_workitem_pending[syncer_delayno]
544	*
545	* A delay of e.g. fifteen seconds is done by placing the request fifteen
546	* entries later in the queue:
547	*
548	* syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
549	*
550	* Flag VI_ONWORKLST indicates that vnode is added into the queue.
551	*/
552
553	#define SYNCER_MAXDELAY 32
554
555	typedef TAILQ_HEAD(synclist, vnode) synclist_t;
556
557	static void vn_syncer_add1(struct vnode , int*);
558	static void sysctl_vfs_syncfs_setup(struct sysctllog **);
559
560	/*
561	* Defines and variables for the syncer process.
562	*/
563	int syncer_maxdelay = SYNCER_MAXDELAY; / maximum delay time /
564	time_t syncdelay = `30`; / max time to delay syncing data /
565	time_t filedelay = `30`; / time to delay syncing files /
566	time_t dirdelay = `15`; / time to delay syncing directories /
567	time_t metadelay = `10`; / time to delay syncing metadata /
568	time_t lockdelay = `1`; / time to delay if locking fails /
569
570	kmutex_t syncer_mutex; / used to freeze syncer, long term /
571	static kmutex_t syncer_data_lock; / short term lock on data structs /
572
573	static int syncer_delayno = `0`;
574	static long syncer_last;
575	static synclist_t * syncer_workitem_pending;
576
577	static void
578	vn_initialize_syncerd(void)
579	{
580	int i;
581
582	syncer_last = SYNCER_MAXDELAY + `2`;
583
584	sysctl_vfs_syncfs_setup(NULL);
585
586	syncer_workitem_pending =
587	kmem_alloc(syncer_last * sizeof (struct synclist), KM_SLEEP);
588
589	for (i = `0`; i < syncer_last; i++)
590	TAILQ_INIT(&syncer_workitem_pending[i]);
591
592	mutex_init(&syncer_mutex, MUTEX_DEFAULT, IPL_NONE);
593	mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE);
594	}
595
596	/*
597	* Return delay factor appropriate for the given file system. For
598	* WAPBL we use the sync vnode to burst out metadata updates: sync
599	* those file systems more frequently.
600	*/
601	static inline int
602	sync_delay(struct mount *mp)
603	{
604
605	return mp->mnt_wapbl != NULL ? metadelay : syncdelay;
606	}
607
608	/*
609	* Compute the next slot index from delay.
610	*/
611	static inline int
612	sync_delay_slot(int delayx)
613	{
614
615	if (delayx > syncer_maxdelay - `2`)
616	delayx = syncer_maxdelay - `2`;
617	return (syncer_delayno + delayx) % syncer_last;
618	}
619
620	/*
621	* Add an item to the syncer work queue.
622	*/
623	static void
624	vn_syncer_add1(struct vnode vp, int* delayx)
625	{
626	synclist_t *slp;
627
628	KASSERT(mutex_owned(&syncer_data_lock));
629
630	if (vp->v_iflag & VI_ONWORKLST) {
631	/*
632	* Remove in order to adjust the position of the vnode.
633	* Note: called from sched_sync(), which will not hold
634	* interlock, therefore we cannot modify v_iflag here.
635	*/
636	slp = &syncer_workitem_pending[vp->v_synclist_slot];
637	TAILQ_REMOVE(slp, vp, v_synclist);
638	} else {
639	KASSERT(mutex_owned(vp->v_interlock));
640	vp->v_iflag \|= VI_ONWORKLST;
641	}
642
643	vp->v_synclist_slot = sync_delay_slot(delayx);
644
645	slp = &syncer_workitem_pending[vp->v_synclist_slot];
646	TAILQ_INSERT_TAIL(slp, vp, v_synclist);
647	}
648
649	void
650	vn_syncer_add_to_worklist(struct vnode vp, int* delayx)
651	{
652
653	KASSERT(mutex_owned(vp->v_interlock));
654
655	mutex_enter(&syncer_data_lock);
656	vn_syncer_add1(vp, delayx);
657	mutex_exit(&syncer_data_lock);
658	}
659
660	/*
661	* Remove an item from the syncer work queue.
662	*/
663	void
664	vn_syncer_remove_from_worklist(struct vnode *vp)
665	{
666	synclist_t *slp;
667
668	KASSERT(mutex_owned(vp->v_interlock));
669
670	mutex_enter(&syncer_data_lock);
671	if (vp->v_iflag & VI_ONWORKLST) {
672	vp->v_iflag &= ~VI_ONWORKLST;
673	slp = &syncer_workitem_pending[vp->v_synclist_slot];
674	TAILQ_REMOVE(slp, vp, v_synclist);
675	}
676	mutex_exit(&syncer_data_lock);
677	}
678
679	/*
680	* Add this mount point to the syncer.
681	*/
682	void
683	vfs_syncer_add_to_worklist(struct mount *mp)
684	{
685	static int start, incr, next;
686	int vdelay;
687
688	KASSERT(mutex_owned(&mp->mnt_updating));
689	KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) == `0`);
690
691	/*
692	* We attempt to scatter the mount points on the list
693	* so that they will go off at evenly distributed times
694	* even if all the filesystems are mounted at once.
695	*/
696
697	next += incr;
698	if (next == `0` \|\| next > syncer_maxdelay) {
699	start /= `2`;
700	incr /= `2`;
701	if (start == `0`) {
702	start = syncer_maxdelay / `2`;
703	incr = syncer_maxdelay;
704	}
705	next = start;
706	}
707	mp->mnt_iflag \|= IMNT_ONWORKLIST;
708	vdelay = sync_delay(mp);
709	mp->mnt_synclist_slot = vdelay > `0` ? next % vdelay : `0`;
710	}
711
712	/*
713	* Remove the mount point from the syncer.
714	*/
715	void
716	vfs_syncer_remove_from_worklist(struct mount *mp)
717	{
718
719	KASSERT(mutex_owned(&mp->mnt_updating));
720	KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) != `0`);
721
722	mp->mnt_iflag &= ~IMNT_ONWORKLIST;
723	}
724
725	/*
726	* Try lazy sync, return true on success.
727	*/
728	static bool
729	lazy_sync_vnode(struct vnode *vp)
730	{
731	bool synced;
732
733	KASSERT(mutex_owned(&syncer_data_lock));
734
735	synced = false;
736	/ We are locking in the wrong direction. /
737	if (mutex_tryenter(vp->v_interlock)) {
738	mutex_exit(&syncer_data_lock);
739	if (vget(vp, LK_NOWAIT, false / !wait /) == `0`) {
740	if (vn_lock(vp, LK_EXCLUSIVE \| LK_NOWAIT) == `0`) {
741	synced = true;
742	(void) VOP_FSYNC(vp, curlwp->l_cred,
743	FSYNC_LAZY, `0`, `0`);
744	vput(vp);
745	} else
746	vrele(vp);
747	}
748	mutex_enter(&syncer_data_lock);
749	}
750	return synced;
751	}
752
753	/*
754	* System filesystem synchronizer daemon.
755	*/
756	void
757	sched_sync(void *arg)
758	{
759	synclist_t *slp;
760	struct vnode *vp;
761	struct mount mp, nmp;
762	time_t starttime;
763	bool synced;
764
765	for (;;) {
766	mutex_enter(&syncer_mutex);
767
768	starttime = time_second;
769
770	/*
771	* Sync mounts whose dirty time has expired.
772	*/
773	mutex_enter(&mountlist_lock);
774	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
775	if ((mp->mnt_iflag & IMNT_ONWORKLIST) == `0` \|\|
776	mp->mnt_synclist_slot != syncer_delayno) {
777	nmp = TAILQ_NEXT(mp, mnt_list);
778	continue;
779	}
780	mp->mnt_synclist_slot = sync_delay_slot(sync_delay(mp));
781	if (vfs_busy(mp, &nmp))
782	continue;
783	VFS_SYNC(mp, MNT_LAZY, curlwp->l_cred);
784	vfs_unbusy(mp, false, &nmp);
785	}
786	mutex_exit(&mountlist_lock);
787
788	mutex_enter(&syncer_data_lock);
789
790	/*
791	* Push files whose dirty time has expired.
792	*/
793	slp = &syncer_workitem_pending[syncer_delayno];
794	syncer_delayno += `1`;
795	if (syncer_delayno >= syncer_last)
796	syncer_delayno = `0`;
797
798	while ((vp = TAILQ_FIRST(slp)) != NULL) {
799	synced = lazy_sync_vnode(vp);
800
801	/*
802	* XXX The vnode may have been recycled, in which
803	* case it may have a new identity.
804	*/
805	if (TAILQ_FIRST(slp) == vp) {
806	/*
807	* Put us back on the worklist. The worklist
808	* routine will remove us from our current
809	* position and then add us back in at a later
810	* position.
811	*
812	* Try again sooner rather than later if
813	* we were unable to lock the vnode. Lock
814	* failure should not prevent us from doing
815	* the sync "soon".
816	*
817	* If we locked it yet arrive here, it's
818	* likely that lazy sync is in progress and
819	* so the vnode still has dirty metadata.
820	* syncdelay is mainly to get this vnode out
821	* of the way so we do not consider it again
822	* "soon" in this loop, so the delay time is
823	* not critical as long as it is not "soon".
824	* While write-back strategy is the file
825	* system's domain, we expect write-back to
826	* occur no later than syncdelay seconds
827	* into the future.
828	*/
829	vn_syncer_add1(vp,
830	synced ? syncdelay : lockdelay);
831	}
832	}
833	mutex_exit(&syncer_mutex);
834
835	/*
836	* If it has taken us less than a second to process the
837	* current work, then wait. Otherwise start right over
838	* again. We can still lose time if any single round
839	* takes more than two seconds, but it does not really
840	* matter as we are just trying to generally pace the
841	* filesystem activity.
842	*/
843	if (time_second == starttime) {
844	kpause("syncer", false, hz, &syncer_data_lock);
845	}
846	mutex_exit(&syncer_data_lock);
847	}
848	}
849
850	static void
851	sysctl_vfs_syncfs_setup(struct sysctllog **clog)
852	{
853	const struct sysctlnode rnode, cnode;
854
855	sysctl_createv(clog, `0`, NULL, &rnode,
856	CTLFLAG_PERMANENT,
857	CTLTYPE_NODE, "sync",
858	SYSCTL_DESCR("syncer options"),
859	NULL, `0`, NULL, `0`,
860	CTL_VFS, CTL_CREATE, CTL_EOL);
861
862	sysctl_createv(clog, `0`, &rnode, &cnode,
863	CTLFLAG_PERMANENT\|CTLFLAG_READWRITE,
864	CTLTYPE_QUAD, "delay",
865	SYSCTL_DESCR("max time to delay syncing data"),
866	NULL, `0`, &syncdelay, `0`,
867	CTL_CREATE, CTL_EOL);
868
869	sysctl_createv(clog, `0`, &rnode, &cnode,
870	CTLFLAG_PERMANENT\|CTLFLAG_READWRITE,
871	CTLTYPE_QUAD, "filedelay",
872	SYSCTL_DESCR("time to delay syncing files"),
873	NULL, `0`, &filedelay, `0`,
874	CTL_CREATE, CTL_EOL);
875
876	sysctl_createv(clog, `0`, &rnode, &cnode,
877	CTLFLAG_PERMANENT\|CTLFLAG_READWRITE,
878	CTLTYPE_QUAD, "dirdelay",
879	SYSCTL_DESCR("time to delay syncing directories"),
880	NULL, `0`, &dirdelay, `0`,
881	CTL_CREATE, CTL_EOL);
882
883	sysctl_createv(clog, `0`, &rnode, &cnode,
884	CTLFLAG_PERMANENT\|CTLFLAG_READWRITE,
885	CTLTYPE_QUAD, "metadelay",
886	SYSCTL_DESCR("time to delay syncing metadata"),
887	NULL, `0`, &metadelay, `0`,
888	CTL_CREATE, CTL_EOL);
889	}
890
891	/*
892	* sysctl helper routine to return list of supported fstypes
893	*/
894	int
895	sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
896	{
897	char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
898	char *where = oldp;
899	struct vfsops *v;
900	size_t needed, left, slen;
901	int error, first;
902
903	if (newp != NULL)
904	return (EPERM);
905	if (namelen != `0`)
906	return (EINVAL);
907
908	first = `1`;
909	error = `0`;
910	needed = `0`;
911	left = *oldlenp;
912
913	sysctl_unlock();
914	mutex_enter(&vfs_list_lock);
915	LIST_FOREACH(v, &vfs_list, vfs_list) {
916	if (where == NULL)
917	needed += strlen(v->vfs_name) + `1`;
918	else {
919	memset(bf, `0`, sizeof(bf));
920	if (first) {
921	strncpy(bf, v->vfs_name, sizeof(bf));
922	first = `0`;
923	} else {
924	bf[`0`] = `' '`;
925	strncpy(bf + `1`, v->vfs_name, sizeof(bf) - `1`);
926	}
927	bf[sizeof(bf)-`1`] = `'\0'`;
928	slen = strlen(bf);
929	if (left < slen + `1`)
930	break;
931	v->vfs_refcount++;
932	mutex_exit(&vfs_list_lock);
933	/ +1 to copy out the trailing NUL byte /
934	error = copyout(bf, where, slen + `1`);
935	mutex_enter(&vfs_list_lock);
936	v->vfs_refcount--;
937	if (error)
938	break;
939	where += slen;
940	needed += slen;
941	left -= slen;
942	}
943	}
944	mutex_exit(&vfs_list_lock);
945	sysctl_relock();
946	*oldlenp = needed;
947	return (error);
948	}
949
950	int kinfo_vdebug = `1`;
951	int kinfo_vgetfailed;
952
953	#define KINFO_VNODESLOP 10
954
955	/*
956	* Dump vnode list (via sysctl).
957	* Copyout address of vnode followed by vnode.
958	*/
959	int
960	sysctl_kern_vnode(SYSCTLFN_ARGS)
961	{
962	char *where = oldp;
963	size_t *sizep = oldlenp;
964	struct mount mp, nmp;
965	vnode_t *vp, vbuf;
966	struct vnode_iterator *marker;
967	char *bp = where;
968	char *ewhere;
969	int error;
970
971	if (namelen != `0`)
972	return (EOPNOTSUPP);
973	if (newp != NULL)
974	return (EPERM);
975
976	#define VPTRSZ sizeof(vnode_t *)
977	#define VNODESZ sizeof(vnode_t)
978	if (where == NULL) {
979	sizep = (numvnodes + KINFO_VNODESLOP) (VPTRSZ + VNODESZ);
980	return (`0`);
981	}
982	ewhere = where + *sizep;
983
984	sysctl_unlock();
985	mutex_enter(&mountlist_lock);
986	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
987	if (vfs_busy(mp, &nmp)) {
988	continue;
989	}
990	vfs_vnode_iterator_init(mp, &marker);
991	while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
992	if (bp + VPTRSZ + VNODESZ > ewhere) {
993	vrele(vp);
994	vfs_vnode_iterator_destroy(marker);
995	vfs_unbusy(mp, false, NULL);
996	sysctl_relock();
997	*sizep = bp - where;
998	return (ENOMEM);
999	}
1000	memcpy(&vbuf, vp, VNODESZ);
1001	if ((error = copyout(&vp, bp, VPTRSZ)) \|\|
1002	(error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1003	vrele(vp);
1004	vfs_vnode_iterator_destroy(marker);
1005	vfs_unbusy(mp, false, NULL);
1006	sysctl_relock();
1007	return (error);
1008	}
1009	vrele(vp);
1010	bp += VPTRSZ + VNODESZ;
1011	}
1012	vfs_vnode_iterator_destroy(marker);
1013	vfs_unbusy(mp, false, &nmp);
1014	}
1015	mutex_exit(&mountlist_lock);
1016	sysctl_relock();
1017
1018	*sizep = bp - where;
1019	return (`0`);
1020	}
1021
1022	/*
1023	* Set vnode attributes to VNOVAL
1024	*/
1025	void
1026	vattr_null(struct vattr *vap)
1027	{
1028
1029	memset(vap, `0`, sizeof(*vap));
1030
1031	vap->va_type = VNON;
1032
1033	/*
1034	* Assign individually so that it is safe even if size and
1035	* sign of each member are varied.
1036	*/
1037	vap->va_mode = VNOVAL;
1038	vap->va_nlink = VNOVAL;
1039	vap->va_uid = VNOVAL;
1040	vap->va_gid = VNOVAL;
1041	vap->va_fsid = VNOVAL;
1042	vap->va_fileid = VNOVAL;
1043	vap->va_size = VNOVAL;
1044	vap->va_blocksize = VNOVAL;
1045	vap->va_atime.tv_sec =
1046	vap->va_mtime.tv_sec =
1047	vap->va_ctime.tv_sec =
1048	vap->va_birthtime.tv_sec = VNOVAL;
1049	vap->va_atime.tv_nsec =
1050	vap->va_mtime.tv_nsec =
1051	vap->va_ctime.tv_nsec =
1052	vap->va_birthtime.tv_nsec = VNOVAL;
1053	vap->va_gen = VNOVAL;
1054	vap->va_flags = VNOVAL;
1055	vap->va_rdev = VNOVAL;
1056	vap->va_bytes = VNOVAL;
1057	}
1058
1059	/*
1060	* Vnode state to string.
1061	*/
1062	const char *
1063	vstate_name(enum vnode_state state)
1064	{
1065
1066	switch (state) {
1067	case VS_MARKER:
1068	return "MARKER";
1069	case VS_LOADING:
1070	return "LOADING";
1071	case VS_ACTIVE:
1072	return "ACTIVE";
1073	case VS_BLOCKED:
1074	return "BLOCKED";
1075	case VS_RECLAIMING:
1076	return "RECLAIMING";
1077	case VS_RECLAIMED:
1078	return "RECLAIMED";
1079	default:
1080	return "ILLEGAL";
1081	}
1082	}
1083
1084	/*
1085	* Print a description of a vnode (common part).
1086	*/
1087	static void
1088	vprint_common(struct vnode vp, const* char *prefix,
1089	void (pr)(const* char *, ...) __printflike(`1`, `2`))
1090	{
1091	int n;
1092	char bf[`96`];
1093	const uint8_t *cp;
1094	vnode_impl_t *node;
1095	const char * const vnode_tags[] = { VNODE_TAGS };
1096	const char * const vnode_types[] = { VNODE_TYPES };
1097	const char vnode_flagbits[] = VNODE_FLAGBITS;
1098
1099	#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1100	#define ARRAY_PRINT(idx, arr) \
1101	((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1102
1103	node = VNODE_TO_VIMPL(vp);
1104
1105	snprintb(bf, sizeof(bf),
1106	vnode_flagbits, vp->v_iflag \| vp->v_vflag \| vp->v_uflag);
1107
1108	(*pr)("vnode %p flags %s\n", vp, bf);
1109	(*pr)("%stag %s(%d) type %s(%d) mount %p typedata %p\n", prefix,
1110	ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1111	ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1112	vp->v_mount, vp->v_mountedhere);
1113	(*pr)("%susecount %d writecount %d holdcount %d\n", prefix,
1114	vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1115	(*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
1116	prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
1117	(*pr)("%sfreelisthd %p data %p lock %p\n", prefix,
1118	vp->v_freelisthd, vp->v_data, &vp->v_lock);
1119
1120	(*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(node->vi_state),
1121	node->vi_key.vk_mount, node->vi_key.vk_key_len);
1122	n = node->vi_key.vk_key_len;
1123	cp = node->vi_key.vk_key;
1124	while (n-- > `0`)
1125	(pr)(" %02x", cp++);
1126	(*pr)("\n");
1127
1128	#undef ARRAY_PRINT
1129	#undef ARRAY_SIZE
1130	}
1131
1132	/*
1133	* Print out a description of a vnode.
1134	*/
1135	void
1136	vprint(const char label, struct* vnode *vp)
1137	{
1138
1139	if (label != NULL)
1140	printf("%s: ", label);
1141	vprint_common(vp, "\t", printf);
1142	if (vp->v_data != NULL) {
1143	printf("\t");
1144	VOP_PRINT(vp);
1145	}
1146	}
1147
1148	/ Deprecated. Kept for KPI compatibility. /
1149	int
1150	vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
1151	mode_t acc_mode, kauth_cred_t cred)
1152	{
1153
1154	#ifdef DIAGNOSTIC
1155	printf("vaccess: deprecated interface used.\n");
1156	#endif /* DIAGNOSTIC */
1157
1158	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(acc_mode,
1159	type, file_mode), NULL / This may panic. /, NULL,
1160	genfs_can_access(type, file_mode, uid, gid, acc_mode, cred));
1161	}
1162
1163	/*
1164	* Given a file system name, look up the vfsops for that
1165	* file system, or return NULL if file system isn't present
1166	* in the kernel.
1167	*/
1168	struct vfsops *
1169	vfs_getopsbyname(const char *name)
1170	{
1171	struct vfsops *v;
1172
1173	mutex_enter(&vfs_list_lock);
1174	LIST_FOREACH(v, &vfs_list, vfs_list) {
1175	if (strcmp(v->vfs_name, name) == `0`)
1176	break;
1177	}
1178	if (v != NULL)
1179	v->vfs_refcount++;
1180	mutex_exit(&vfs_list_lock);
1181
1182	return (v);
1183	}
1184
1185	void
1186	copy_statvfs_info(struct statvfs sbp, const* struct mount *mp)
1187	{
1188	const struct statvfs *mbp;
1189
1190	if (sbp == (mbp = &mp->mnt_stat))
1191	return;
1192
1193	(void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
1194	sbp->f_fsid = mbp->f_fsid;
1195	sbp->f_owner = mbp->f_owner;
1196	sbp->f_flag = mbp->f_flag;
1197	sbp->f_syncwrites = mbp->f_syncwrites;
1198	sbp->f_asyncwrites = mbp->f_asyncwrites;
1199	sbp->f_syncreads = mbp->f_syncreads;
1200	sbp->f_asyncreads = mbp->f_asyncreads;
1201	(void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1202	(void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1203	sizeof(sbp->f_fstypename));
1204	(void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1205	sizeof(sbp->f_mntonname));
1206	(void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
1207	sizeof(sbp->f_mntfromname));
1208	sbp->f_namemax = mbp->f_namemax;
1209	}
1210
1211	int
1212	set_statvfs_info(const char onp, int* ukon, const char fromp, int* ukfrom,
1213	const char vfsname, struct* mount mp, struct* lwp *l)
1214	{
1215	int error;
1216	size_t size;
1217	struct statvfs *sfs = &mp->mnt_stat;
1218	int (fun)(const* void , void* , size_t, size_t );
1219
1220	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
1221	sizeof(mp->mnt_stat.f_fstypename));
1222
1223	if (onp) {
1224	struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1225	fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
1226	if (cwdi->cwdi_rdir != NULL) {
1227	size_t len;
1228	char *bp;
1229	char *path = PNBUF_GET();
1230
1231	bp = path + MAXPATHLEN;
1232	*--bp = `'\0'`;
1233	rw_enter(&cwdi->cwdi_lock, RW_READER);
1234	error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1235	path, MAXPATHLEN / `2`, `0`, l);
1236	rw_exit(&cwdi->cwdi_lock);
1237	if (error) {
1238	PNBUF_PUT(path);
1239	return error;
1240	}
1241
1242	len = strlen(bp);
1243	if (len > sizeof(sfs->f_mntonname) - `1`)
1244	len = sizeof(sfs->f_mntonname) - `1`;
1245	(void)strncpy(sfs->f_mntonname, bp, len);
1246	PNBUF_PUT(path);
1247
1248	if (len < sizeof(sfs->f_mntonname) - `1`) {
1249	error = (*fun)(onp, &sfs->f_mntonname[len],
1250	sizeof(sfs->f_mntonname) - len - `1`, &size);
1251	if (error)
1252	return error;
1253	size += len;
1254	} else {
1255	size = len;
1256	}
1257	} else {
1258	error = (*fun)(onp, &sfs->f_mntonname,
1259	sizeof(sfs->f_mntonname) - `1`, &size);
1260	if (error)
1261	return error;
1262	}
1263	(void)memset(sfs->f_mntonname + size, `0`,
1264	sizeof(sfs->f_mntonname) - size);
1265	}
1266
1267	if (fromp) {
1268	fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
1269	error = (*fun)(fromp, sfs->f_mntfromname,
1270	sizeof(sfs->f_mntfromname) - `1`, &size);
1271	if (error)
1272	return error;
1273	(void)memset(sfs->f_mntfromname + size, `0`,
1274	sizeof(sfs->f_mntfromname) - size);
1275	}
1276	return `0`;
1277	}
1278
1279	void
1280	vfs_timestamp(struct timespec *ts)
1281	{
1282
1283	nanotime(ts);
1284	}
1285
1286	time_t rootfstime; / recorded root fs time, if known /
1287	void
1288	setrootfstime(time_t t)
1289	{
1290	rootfstime = t;
1291	}
1292
1293	static const uint8_t vttodt_tab[ ] = {
1294	[VNON] = DT_UNKNOWN,
1295	[VREG] = DT_REG,
1296	[VDIR] = DT_DIR,
1297	[VBLK] = DT_BLK,
1298	[VCHR] = DT_CHR,
1299	[VLNK] = DT_LNK,
1300	[VSOCK] = DT_SOCK,
1301	[VFIFO] = DT_FIFO,
1302	[VBAD] = DT_UNKNOWN
1303	};
1304
1305	uint8_t
1306	vtype2dt(enum vtype vt)
1307	{
1308
1309	CTASSERT(VBAD == __arraycount(vttodt_tab) - `1`);
1310	return vttodt_tab[vt];
1311	}
1312
1313	int
1314	VFS_MOUNT(struct mount mp, const* char a, void* b, size_t c)
1315	{
1316	int error;
1317
1318	KERNEL_LOCK(`1`, NULL);
1319	error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
1320	KERNEL_UNLOCK_ONE(NULL);
1321
1322	return error;
1323	}
1324
1325	int
1326	VFS_START(struct mount mp, int* a)
1327	{
1328	int error;
1329
1330	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1331	KERNEL_LOCK(`1`, NULL);
1332	}
1333	error = (*(mp->mnt_op->vfs_start))(mp, a);
1334	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1335	KERNEL_UNLOCK_ONE(NULL);
1336	}
1337
1338	return error;
1339	}
1340
1341	int
1342	VFS_UNMOUNT(struct mount mp, int* a)
1343	{
1344	int error;
1345
1346	KERNEL_LOCK(`1`, NULL);
1347	error = (*(mp->mnt_op->vfs_unmount))(mp, a);
1348	KERNEL_UNLOCK_ONE(NULL);
1349
1350	return error;
1351	}
1352
1353	int
1354	VFS_ROOT(struct mount mp, struct* vnode **a)
1355	{
1356	int error;
1357
1358	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1359	KERNEL_LOCK(`1`, NULL);
1360	}
1361	error = (*(mp->mnt_op->vfs_root))(mp, a);
1362	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1363	KERNEL_UNLOCK_ONE(NULL);
1364	}
1365
1366	return error;
1367	}
1368
1369	int
1370	VFS_QUOTACTL(struct mount mp, struct* quotactl_args *args)
1371	{
1372	int error;
1373
1374	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1375	KERNEL_LOCK(`1`, NULL);
1376	}
1377	error = (*(mp->mnt_op->vfs_quotactl))(mp, args);
1378	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1379	KERNEL_UNLOCK_ONE(NULL);
1380	}
1381
1382	return error;
1383	}
1384
1385	int
1386	VFS_STATVFS(struct mount mp, struct* statvfs *a)
1387	{
1388	int error;
1389
1390	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1391	KERNEL_LOCK(`1`, NULL);
1392	}
1393	error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
1394	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1395	KERNEL_UNLOCK_ONE(NULL);
1396	}
1397
1398	return error;
1399	}
1400
1401	int
1402	VFS_SYNC(struct mount mp, int* a, struct kauth_cred *b)
1403	{
1404	int error;
1405
1406	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1407	KERNEL_LOCK(`1`, NULL);
1408	}
1409	error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
1410	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1411	KERNEL_UNLOCK_ONE(NULL);
1412	}
1413
1414	return error;
1415	}
1416
1417	int
1418	VFS_FHTOVP(struct mount mp, struct* fid a, struct* vnode **b)
1419	{
1420	int error;
1421
1422	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1423	KERNEL_LOCK(`1`, NULL);
1424	}
1425	error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
1426	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1427	KERNEL_UNLOCK_ONE(NULL);
1428	}
1429
1430	return error;
1431	}
1432
1433	int
1434	VFS_VPTOFH(struct vnode vp, struct* fid a, size_t b)
1435	{
1436	int error;
1437
1438	if ((vp->v_vflag & VV_MPSAFE) == `0`) {
1439	KERNEL_LOCK(`1`, NULL);
1440	}
1441	error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
1442	if ((vp->v_vflag & VV_MPSAFE) == `0`) {
1443	KERNEL_UNLOCK_ONE(NULL);
1444	}
1445
1446	return error;
1447	}
1448
1449	int
1450	VFS_SNAPSHOT(struct mount mp, struct* vnode a, struct* timespec *b)
1451	{
1452	int error;
1453
1454	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1455	KERNEL_LOCK(`1`, NULL);
1456	}
1457	error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
1458	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1459	KERNEL_UNLOCK_ONE(NULL);
1460	}
1461
1462	return error;
1463	}
1464
1465	int
1466	VFS_EXTATTRCTL(struct mount mp, int* a, struct vnode b, int* c, const char *d)
1467	{
1468	int error;
1469
1470	KERNEL_LOCK(`1`, NULL); / XXXSMP check ffs /
1471	error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
1472	KERNEL_UNLOCK_ONE(NULL); / XXX /
1473
1474	return error;
1475	}
1476
1477	int
1478	VFS_SUSPENDCTL(struct mount mp, int* a)
1479	{
1480	int error;
1481
1482	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1483	KERNEL_LOCK(`1`, NULL);
1484	}
1485	error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
1486	if ((mp->mnt_iflag & IMNT_MPSAFE) == `0`) {
1487	KERNEL_UNLOCK_ONE(NULL);
1488	}
1489
1490	return error;
1491	}
1492
1493	#if defined(DDB) \|\| defined(DEBUGPRINT)
1494	static const char buf_flagbits[] = BUF_FLAGBITS;
1495
1496	void
1497	vfs_buf_print(struct buf bp, int* full, void (pr)(const* char *, ...))
1498	{
1499	char bf[`1024`];
1500
1501	(*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
1502	PRIx64 " dev 0x%x\n",
1503	bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
1504
1505	snprintb(bf, sizeof(bf),
1506	buf_flagbits, bp->b_flags \| bp->b_oflags \| bp->b_cflags);
1507	(*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
1508
1509	(*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1510	bp->b_bufsize, bp->b_bcount, bp->b_resid);
1511	(*pr)(" data %p saveaddr %p\n",
1512	bp->b_data, bp->b_saveaddr);
1513	(*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
1514	}
1515
1516	void
1517	vfs_vnode_print(struct vnode vp, int* full, void (pr)(const* char *, ...))
1518	{
1519
1520	uvm_object_printit(&vp->v_uobj, full, pr);
1521	(*pr)("\n");
1522	vprint_common(vp, "", printf);
1523	if (full) {
1524	struct buf *bp;
1525
1526	(*pr)("clean bufs:\n");
1527	LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
1528	(*pr)(" bp %p\n", bp);
1529	vfs_buf_print(bp, full, pr);
1530	}
1531
1532	(*pr)("dirty bufs:\n");
1533	LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
1534	(*pr)(" bp %p\n", bp);
1535	vfs_buf_print(bp, full, pr);
1536	}
1537	}
1538	}
1539
1540	void
1541	vfs_mount_print(struct mount mp, int* full, void (pr)(const* char *, ...))
1542	{
1543	char sbuf[`256`];
1544
1545	(*pr)("vnodecovered = %p data = %p\n",
1546	mp->mnt_vnodecovered,mp->mnt_data);
1547
1548	(*pr)("fs_bshift %d dev_bshift = %d\n",
1549	mp->mnt_fs_bshift,mp->mnt_dev_bshift);
1550
1551	snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1552	(*pr)("flag = %s\n", sbuf);
1553
1554	snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1555	(*pr)("iflag = %s\n", sbuf);
1556
1557	(*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
1558	&mp->mnt_unmounting, &mp->mnt_updating);
1559
1560	(*pr)("statvfs cache:\n");
1561	(*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
1562	(*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
1563	(*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
1564
1565	(*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
1566	(*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
1567	(*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
1568	(*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
1569
1570	(*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
1571	(*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
1572	(*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
1573	(*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
1574
1575	(*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
1576	mp->mnt_stat.f_fsidx.__fsid_val[`0`],
1577	mp->mnt_stat.f_fsidx.__fsid_val[`1`]);
1578
1579	(*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
1580	(*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
1581
1582	snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
1583
1584	(*pr)("\tflag = %s\n",sbuf);
1585	(*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
1586	(*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
1587	(*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
1588	(*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
1589	(*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
1590	(*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
1591	(*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
1592
1593	{
1594	int cnt = `0`;
1595	struct vnode *vp;
1596	(*pr)("locked vnodes =");
1597	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1598	if (VOP_ISLOCKED(vp)) {
1599	if ((++cnt % `6`) == `0`) {
1600	(*pr)(" %p,\n\t", vp);
1601	} else {
1602	(*pr)(" %p,", vp);
1603	}
1604	}
1605	}
1606	(*pr)("\n");
1607	}
1608
1609	if (full) {
1610	int cnt = `0`;
1611	struct vnode *vp;
1612	(*pr)("all vnodes =");
1613	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1614	if (!TAILQ_NEXT(vp, v_mntvnodes)) {
1615	(*pr)(" %p", vp);
1616	} else if ((++cnt % `6`) == `0`) {
1617	(*pr)(" %p,\n\t", vp);
1618	} else {
1619	(*pr)(" %p,", vp);
1620	}
1621	}
1622	(*pr)("\n", vp);
1623	}
1624	}
1625
1626	/*
1627	* List all of the locked vnodes in the system.
1628	*/
1629	void printlockedvnodes(void);
1630
1631	void
1632	printlockedvnodes(void)
1633	{
1634	struct mount mp, nmp;
1635	struct vnode *vp;
1636
1637	printf("Locked vnodes\n");
1638	mutex_enter(&mountlist_lock);
1639	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1640	if (vfs_busy(mp, &nmp)) {
1641	continue;
1642	}
1643	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1644	if (VOP_ISLOCKED(vp))
1645	vprint(NULL, vp);
1646	}
1647	mutex_enter(&mountlist_lock);
1648	vfs_unbusy(mp, false, &nmp);
1649	}
1650	mutex_exit(&mountlist_lock);
1651	}
1652
1653	#endif /* DDB \|\| DEBUGPRINT */
1654

Browse the source code of src/src/sys/kern/vfs_subr.c