1/* $NetBSD: socketvar.h,v 1.141 2016/09/13 07:01:08 martin Exp $ */
2
3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)socketvar.h 8.3 (Berkeley) 2/19/95
61 */
62
63#ifndef _SYS_SOCKETVAR_H_
64#define _SYS_SOCKETVAR_H_
65
66#include <sys/select.h>
67#include <sys/selinfo.h> /* for struct selinfo */
68#include <sys/queue.h>
69#include <sys/mutex.h>
70#include <sys/condvar.h>
71
72#if !defined(_KERNEL)
73struct uio;
74struct lwp;
75struct uidinfo;
76#else
77#include <sys/uidinfo.h>
78#endif
79
80TAILQ_HEAD(soqhead, socket);
81
82/*
83 * Variables for socket buffering.
84 */
85struct sockbuf {
86 struct selinfo sb_sel; /* process selecting read/write */
87 struct mowner *sb_mowner; /* who owns data for this sockbuf */
88 struct socket *sb_so; /* back pointer to socket */
89 kcondvar_t sb_cv; /* notifier */
90 /* When re-zeroing this struct, we zero from sb_startzero to the end */
91#define sb_startzero sb_cc
92 u_long sb_cc; /* actual chars in buffer */
93 u_long sb_hiwat; /* max actual char count */
94 u_long sb_mbcnt; /* chars of mbufs used */
95 u_long sb_mbmax; /* max chars of mbufs to use */
96 long sb_lowat; /* low water mark */
97 struct mbuf *sb_mb; /* the mbuf chain */
98 struct mbuf *sb_mbtail; /* the last mbuf in the chain */
99 struct mbuf *sb_lastrecord; /* first mbuf of last record in
100 socket buffer */
101 int sb_flags; /* flags, see below */
102 int sb_timeo; /* timeout for read/write */
103 u_long sb_overflowed; /* # of drops due to full buffer */
104};
105
106#ifndef SB_MAX
107#define SB_MAX (256*1024) /* default for max chars in sockbuf */
108#endif
109
110#define SB_LOCK 0x01 /* lock on data queue */
111#define SB_NOTIFY 0x04 /* someone is waiting for data/space */
112#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
113#define SB_UPCALL 0x20 /* someone wants an upcall */
114#define SB_NOINTR 0x40 /* operations not interruptible */
115#define SB_KNOTE 0x100 /* kernel note attached */
116#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
117
118/*
119 * Kernel structure per socket.
120 * Contains send and receive buffer queues,
121 * handle on protocol and pointer to protocol
122 * private data and error information.
123 */
124struct so_accf {
125 struct accept_filter *so_accept_filter;
126 void *so_accept_filter_arg; /* saved filter args */
127 char *so_accept_filter_str; /* saved user args */
128};
129
130struct sockaddr;
131
132struct socket {
133 kmutex_t * volatile so_lock; /* pointer to lock on structure */
134 kcondvar_t so_cv; /* notifier */
135 short so_type; /* generic type, see socket.h */
136 short so_options; /* from socket call, see socket.h */
137 u_short so_linger; /* time to linger while closing */
138 short so_state; /* internal state flags SS_*, below */
139 int so_unused; /* used to be so_nbio */
140 void *so_pcb; /* protocol control block */
141 const struct protosw *so_proto; /* protocol handle */
142/*
143 * Variables for connection queueing.
144 * Socket where accepts occur is so_head in all subsidiary sockets.
145 * If so_head is 0, socket is not related to an accept.
146 * For head socket so_q0 queues partially completed connections,
147 * while so_q is a queue of connections ready to be accepted.
148 * If a connection is aborted and it has so_head set, then
149 * it has to be pulled out of either so_q0 or so_q.
150 * We allow connections to queue up based on current queue lengths
151 * and limit on number of queued connections for this socket.
152 */
153 struct socket *so_head; /* back pointer to accept socket */
154 struct soqhead *so_onq; /* queue (q or q0) that we're on */
155 struct soqhead so_q0; /* queue of partial connections */
156 struct soqhead so_q; /* queue of incoming connections */
157 TAILQ_ENTRY(socket) so_qe; /* our queue entry (q or q0) */
158 short so_q0len; /* partials on so_q0 */
159 short so_qlen; /* number of connections on so_q */
160 short so_qlimit; /* max number queued connections */
161 short so_timeo; /* connection timeout */
162 u_short so_error; /* error affecting connection */
163 u_short so_aborting; /* references from soabort() */
164 pid_t so_pgid; /* pgid for signals */
165 u_long so_oobmark; /* chars to oob mark */
166 struct sockbuf so_snd; /* send buffer */
167 struct sockbuf so_rcv; /* receive buffer */
168
169 void *so_internal; /* Space for svr4 stream data */
170 void (*so_upcall) (struct socket *, void *, int, int);
171 void * so_upcallarg; /* Arg for above */
172 int (*so_send) (struct socket *, struct sockaddr *,
173 struct uio *, struct mbuf *,
174 struct mbuf *, int, struct lwp *);
175 int (*so_receive) (struct socket *,
176 struct mbuf **,
177 struct uio *, struct mbuf **,
178 struct mbuf **, int *);
179 struct mowner *so_mowner; /* who owns mbufs for this socket */
180 struct uidinfo *so_uidinfo; /* who opened the socket */
181 gid_t so_egid; /* creator effective gid */
182 pid_t so_cpid; /* creator pid */
183 struct so_accf *so_accf;
184 kauth_cred_t so_cred; /* socket credentials */
185};
186
187/*
188 * Socket state bits.
189 */
190#define SS_NOFDREF 0x001 /* no file table ref any more */
191#define SS_ISCONNECTED 0x002 /* socket connected to a peer */
192#define SS_ISCONNECTING 0x004 /* in process of connecting to peer */
193#define SS_ISDISCONNECTING 0x008 /* in process of disconnecting */
194#define SS_CANTSENDMORE 0x010 /* can't send more data to peer */
195#define SS_CANTRCVMORE 0x020 /* can't receive more data from peer */
196#define SS_RCVATMARK 0x040 /* at mark on input */
197#define SS_ISABORTING 0x080 /* aborting fd references - close() */
198#define SS_RESTARTSYS 0x100 /* restart blocked system calls */
199#define SS_ISDISCONNECTED 0x800 /* socket disconnected from peer */
200
201#define SS_ASYNC 0x100 /* async i/o notify */
202#define SS_MORETOCOME 0x400 /*
203 * hint from sosend to lower layer;
204 * more data coming
205 */
206#define SS_ISAPIPE 0x1000 /* socket is implementing a pipe */
207#define SS_NBIO 0x2000 /* socket is in non blocking I/O */
208
209#ifdef _KERNEL
210
211struct accept_filter {
212 char accf_name[16];
213 void (*accf_callback)
214 (struct socket *, void *, int, int);
215 void * (*accf_create)
216 (struct socket *, char *);
217 void (*accf_destroy)
218 (struct socket *);
219 LIST_ENTRY(accept_filter) accf_next;
220 u_int accf_refcnt;
221};
222
223struct sockopt {
224 int sopt_level; /* option level */
225 int sopt_name; /* option name */
226 size_t sopt_size; /* data length */
227 void * sopt_data; /* data pointer */
228 uint8_t sopt_buf[sizeof(int)]; /* internal storage */
229};
230
231#define SB_EMPTY_FIXUP(sb) \
232do { \
233 KASSERT(solocked((sb)->sb_so)); \
234 if ((sb)->sb_mb == NULL) { \
235 (sb)->sb_mbtail = NULL; \
236 (sb)->sb_lastrecord = NULL; \
237 } \
238} while (/*CONSTCOND*/0)
239
240extern u_long sb_max;
241extern int somaxkva;
242extern int sock_loan_thresh;
243extern kmutex_t *softnet_lock;
244
245struct mbuf;
246struct lwp;
247struct msghdr;
248struct stat;
249struct knote;
250
251struct mbuf *getsombuf(struct socket *, int);
252
253/*
254 * File operations on sockets.
255 */
256int soo_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
257int soo_write(file_t *, off_t *, struct uio *, kauth_cred_t, int);
258int soo_fcntl(file_t *, u_int cmd, void *);
259int soo_ioctl(file_t *, u_long cmd, void *);
260int soo_poll(file_t *, int);
261int soo_kqfilter(file_t *, struct knote *);
262int soo_close(file_t *);
263int soo_stat(file_t *, struct stat *);
264void soo_restart(file_t *);
265void sbappend(struct sockbuf *, struct mbuf *);
266void sbappendstream(struct sockbuf *, struct mbuf *);
267int sbappendaddr(struct sockbuf *, const struct sockaddr *, struct mbuf *,
268 struct mbuf *);
269int sbappendaddrchain(struct sockbuf *, const struct sockaddr *,
270 struct mbuf *, int);
271int sbappendcontrol(struct sockbuf *, struct mbuf *, struct mbuf *);
272void sbappendrecord(struct sockbuf *, struct mbuf *);
273void sbcheck(struct sockbuf *);
274void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
275struct mbuf *
276 sbcreatecontrol(void *, int, int, int);
277struct mbuf *
278 sbcreatecontrol1(void **, int, int, int, int);
279void sbdrop(struct sockbuf *, int);
280void sbdroprecord(struct sockbuf *);
281void sbflush(struct sockbuf *);
282void sbinsertoob(struct sockbuf *, struct mbuf *);
283void sbrelease(struct sockbuf *, struct socket *);
284int sbreserve(struct sockbuf *, u_long, struct socket *);
285int sbwait(struct sockbuf *);
286int sb_max_set(u_long);
287void soinit(void);
288void soinit1(void);
289void soinit2(void);
290int soabort(struct socket *);
291int soaccept(struct socket *, struct sockaddr *);
292int sofamily(const struct socket *);
293int sobind(struct socket *, struct sockaddr *, struct lwp *);
294void socantrcvmore(struct socket *);
295void socantsendmore(struct socket *);
296int soclose(struct socket *);
297int soconnect(struct socket *, struct sockaddr *, struct lwp *);
298int soconnect2(struct socket *, struct socket *);
299int socreate(int, struct socket **, int, int, struct lwp *,
300 struct socket *);
301int fsocreate(int, struct socket **, int, int, int *);
302int sodisconnect(struct socket *);
303void sofree(struct socket *);
304int sogetopt(struct socket *, struct sockopt *);
305void sohasoutofband(struct socket *);
306void soisconnected(struct socket *);
307void soisconnecting(struct socket *);
308void soisdisconnected(struct socket *);
309void soisdisconnecting(struct socket *);
310int solisten(struct socket *, int, struct lwp *);
311struct socket *
312 sonewconn(struct socket *, bool);
313void soqinsque(struct socket *, struct socket *, int);
314bool soqremque(struct socket *, int);
315int soreceive(struct socket *, struct mbuf **, struct uio *,
316 struct mbuf **, struct mbuf **, int *);
317int soreserve(struct socket *, u_long, u_long);
318void sorflush(struct socket *);
319int sosend(struct socket *, struct sockaddr *, struct uio *,
320 struct mbuf *, struct mbuf *, int, struct lwp *);
321int sosetopt(struct socket *, struct sockopt *);
322int so_setsockopt(struct lwp *, struct socket *, int, int, const void *, size_t);
323int soshutdown(struct socket *, int);
324void sorestart(struct socket *);
325void sowakeup(struct socket *, struct sockbuf *, int);
326int sockargs(struct mbuf **, const void *, size_t, int);
327int sopoll(struct socket *, int);
328struct socket *soget(bool);
329void soput(struct socket *);
330bool solocked(struct socket *);
331bool solocked2(struct socket *, struct socket *);
332int sblock(struct sockbuf *, int);
333void sbunlock(struct sockbuf *);
334int sowait(struct socket *, bool, int);
335void solockretry(struct socket *, kmutex_t *);
336void sosetlock(struct socket *);
337void solockreset(struct socket *, kmutex_t *);
338
339void sockopt_init(struct sockopt *, int, int, size_t);
340void sockopt_destroy(struct sockopt *);
341int sockopt_set(struct sockopt *, const void *, size_t);
342int sockopt_setint(struct sockopt *, int);
343int sockopt_get(const struct sockopt *, void *, size_t);
344int sockopt_getint(const struct sockopt *, int *);
345int sockopt_setmbuf(struct sockopt *, struct mbuf *);
346struct mbuf *sockopt_getmbuf(const struct sockopt *);
347
348int copyout_sockname(struct sockaddr *, unsigned int *, int, struct mbuf *);
349int copyout_msg_control(struct lwp *, struct msghdr *, struct mbuf *);
350void free_control_mbuf(struct lwp *, struct mbuf *, struct mbuf *);
351
352int do_sys_getpeername(int, struct sockaddr *);
353int do_sys_getsockname(int, struct sockaddr *);
354int do_sys_sendmsg(struct lwp *, int, struct msghdr *, int,
355 const void *, size_t, register_t *);
356int do_sys_recvmsg(struct lwp *, int, struct msghdr *,
357 const void *, size_t,
358 struct mbuf **, struct mbuf **, register_t *);
359
360int do_sys_bind(struct lwp *, int, struct sockaddr *);
361int do_sys_connect(struct lwp *, int, struct sockaddr *);
362int do_sys_accept(struct lwp *, int, struct sockaddr *, register_t *,
363 const sigset_t *, int, int);
364
365/*
366 * Inline functions for sockets and socket buffering.
367 */
368
369#include <sys/protosw.h>
370#include <sys/mbuf.h>
371
372/*
373 * Do we need to notify the other side when I/O is possible?
374 */
375static inline int
376sb_notify(struct sockbuf *sb)
377{
378
379 KASSERT(solocked(sb->sb_so));
380
381 return sb->sb_flags & (SB_NOTIFY | SB_ASYNC | SB_UPCALL | SB_KNOTE);
382}
383
384/*
385 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
386 * This is problematical if the fields are unsigned, as the space might
387 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
388 * overflow and return 0.
389 */
390static inline long
391sbspace(struct sockbuf *sb)
392{
393
394 KASSERT(solocked(sb->sb_so));
395
396 return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
397}
398
399/* do we have to send all at once on a socket? */
400static inline int
401sosendallatonce(struct socket *so)
402{
403
404 return so->so_proto->pr_flags & PR_ATOMIC;
405}
406
407/* can we read something from so? */
408static inline int
409soreadable(struct socket *so)
410{
411
412 KASSERT(solocked(so));
413
414 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
415 (so->so_state & SS_CANTRCVMORE) != 0 ||
416 so->so_qlen != 0 || so->so_error != 0;
417}
418
419/* can we write something to so? */
420static inline int
421sowritable(struct socket *so)
422{
423
424 KASSERT(solocked(so));
425
426 return (sbspace(&so->so_snd) >= so->so_snd.sb_lowat &&
427 ((so->so_state & SS_ISCONNECTED) != 0 ||
428 (so->so_proto->pr_flags & PR_CONNREQUIRED) == 0)) ||
429 (so->so_state & SS_CANTSENDMORE) != 0 ||
430 so->so_error != 0;
431}
432
433/* adjust counters in sb reflecting allocation of m */
434static inline void
435sballoc(struct sockbuf *sb, struct mbuf *m)
436{
437
438 KASSERT(solocked(sb->sb_so));
439
440 sb->sb_cc += m->m_len;
441 sb->sb_mbcnt += MSIZE;
442 if (m->m_flags & M_EXT)
443 sb->sb_mbcnt += m->m_ext.ext_size;
444}
445
446/* adjust counters in sb reflecting freeing of m */
447static inline void
448sbfree(struct sockbuf *sb, struct mbuf *m)
449{
450
451 KASSERT(solocked(sb->sb_so));
452
453 sb->sb_cc -= m->m_len;
454 sb->sb_mbcnt -= MSIZE;
455 if (m->m_flags & M_EXT)
456 sb->sb_mbcnt -= m->m_ext.ext_size;
457}
458
459static inline void
460sorwakeup(struct socket *so)
461{
462
463 KASSERT(solocked(so));
464
465 if (sb_notify(&so->so_rcv))
466 sowakeup(so, &so->so_rcv, POLL_IN);
467}
468
469static inline void
470sowwakeup(struct socket *so)
471{
472
473 KASSERT(solocked(so));
474
475 if (sb_notify(&so->so_snd))
476 sowakeup(so, &so->so_snd, POLL_OUT);
477}
478
479static inline void
480solock(struct socket *so)
481{
482 kmutex_t *lock;
483
484 lock = so->so_lock;
485 mutex_enter(lock);
486 if (__predict_false(lock != so->so_lock))
487 solockretry(so, lock);
488}
489
490static inline void
491sounlock(struct socket *so)
492{
493
494 mutex_exit(so->so_lock);
495}
496
497#ifdef SOCKBUF_DEBUG
498/*
499 * SBLASTRECORDCHK: check sb->sb_lastrecord is maintained correctly.
500 * SBLASTMBUFCHK: check sb->sb_mbtail is maintained correctly.
501 *
502 * => panic if the socket buffer is inconsistent.
503 * => 'where' is used for a panic message.
504 */
505void sblastrecordchk(struct sockbuf *, const char *);
506#define SBLASTRECORDCHK(sb, where) sblastrecordchk((sb), (where))
507
508void sblastmbufchk(struct sockbuf *, const char *);
509#define SBLASTMBUFCHK(sb, where) sblastmbufchk((sb), (where))
510#define SBCHECK(sb) sbcheck(sb)
511#else
512#define SBLASTRECORDCHK(sb, where) /* nothing */
513#define SBLASTMBUFCHK(sb, where) /* nothing */
514#define SBCHECK(sb) /* nothing */
515#endif /* SOCKBUF_DEBUG */
516
517/* sosend loan */
518vaddr_t sokvaalloc(vaddr_t, vsize_t, struct socket *);
519void sokvafree(vaddr_t, vsize_t);
520void soloanfree(struct mbuf *, void *, size_t, void *);
521
522/*
523 * Values for socket-buffer-append priority argument to sbappendaddrchain().
524 * The following flags are reserved for future implementation:
525 *
526 * SB_PRIO_NONE: honour normal socket-buffer limits.
527 *
528 * SB_PRIO_ONESHOT_OVERFLOW: if the socket has any space,
529 * deliver the entire chain. Intended for large requests
530 * that should be delivered in their entirety, or not at all.
531 *
532 * SB_PRIO_OVERDRAFT: allow a small (2*MLEN) overflow, over and
533 * aboce normal socket limits. Intended messages indicating
534 * buffer overflow in earlier normal/lower-priority messages .
535 *
536 * SB_PRIO_BESTEFFORT: Ignore limits entirely. Intended only for
537 * kernel-generated messages to specially-marked scokets which
538 * require "reliable" delivery, nd where the source socket/protocol
539 * message generator enforce some hard limit (but possibly well
540 * above kern.sbmax). It is entirely up to the in-kernel source to
541 * avoid complete mbuf exhaustion or DoS scenarios.
542 */
543#define SB_PRIO_NONE 0
544#define SB_PRIO_ONESHOT_OVERFLOW 1
545#define SB_PRIO_OVERDRAFT 2
546#define SB_PRIO_BESTEFFORT 3
547
548/*
549 * Accept filter functions (duh).
550 */
551int accept_filt_getopt(struct socket *, struct sockopt *);
552int accept_filt_setopt(struct socket *, const struct sockopt *);
553int accept_filt_clear(struct socket *);
554int accept_filt_add(struct accept_filter *);
555int accept_filt_del(struct accept_filter *);
556struct accept_filter *accept_filt_get(char *);
557#ifdef ACCEPT_FILTER_MOD
558#ifdef SYSCTL_DECL
559SYSCTL_DECL(_net_inet_accf);
560#endif
561void accept_filter_init(void);
562#endif
563
564#endif /* _KERNEL */
565
566#endif /* !_SYS_SOCKETVAR_H_ */
567