1 | /* $NetBSD: kern_syscall.c,v 1.14 2015/11/30 23:34:47 pgoyette Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software developed for The NetBSD Foundation |
8 | * by Andrew Doran. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.14 2015/11/30 23:34:47 pgoyette Exp $" ); |
34 | |
35 | #ifdef _KERNEL_OPT |
36 | #include "opt_modular.h" |
37 | #include "opt_syscall_debug.h" |
38 | #include "opt_ktrace.h" |
39 | #include "opt_ptrace.h" |
40 | #include "opt_dtrace.h" |
41 | #endif |
42 | |
43 | /* XXX To get syscall prototypes. */ |
44 | #define SYSVSHM |
45 | #define SYSVSEM |
46 | #define SYSVMSG |
47 | |
48 | #include <sys/param.h> |
49 | #include <sys/module.h> |
50 | #include <sys/sched.h> |
51 | #include <sys/syscall.h> |
52 | #include <sys/syscallargs.h> |
53 | #include <sys/syscallvar.h> |
54 | #include <sys/systm.h> |
55 | #include <sys/xcall.h> |
56 | #include <sys/ktrace.h> |
57 | #include <sys/ptrace.h> |
58 | |
59 | int |
60 | sys_nomodule(struct lwp *l, const void *v, register_t *retval) |
61 | { |
62 | #ifdef MODULAR |
63 | |
64 | const struct sysent *sy; |
65 | const struct emul *em; |
66 | const struct sc_autoload *auto_list; |
67 | u_int code; |
68 | |
69 | /* |
70 | * Restart the syscall if we interrupted a module unload that |
71 | * failed. Acquiring kernconfig_lock delays us until any unload |
72 | * has been completed or rolled back. |
73 | */ |
74 | kernconfig_lock(); |
75 | sy = l->l_sysent; |
76 | if (sy->sy_call != sys_nomodule) { |
77 | kernconfig_unlock(); |
78 | return ERESTART; |
79 | } |
80 | /* |
81 | * Try to autoload a module to satisfy the request. If it |
82 | * works, retry the request. |
83 | */ |
84 | em = l->l_proc->p_emul; |
85 | code = sy - em->e_sysent; |
86 | |
87 | if ((auto_list = em->e_sc_autoload) != NULL) |
88 | for (; auto_list->al_code > 0; auto_list++) { |
89 | if (auto_list->al_code != code) { |
90 | continue; |
91 | } |
92 | if (module_autoload(auto_list->al_module, |
93 | MODULE_CLASS_ANY) != 0 || |
94 | sy->sy_call == sys_nomodule) { |
95 | break; |
96 | } |
97 | kernconfig_unlock(); |
98 | return ERESTART; |
99 | } |
100 | kernconfig_unlock(); |
101 | #endif /* MODULAR */ |
102 | |
103 | return sys_nosys(l, v, retval); |
104 | } |
105 | |
106 | int |
107 | syscall_establish(const struct emul *em, const struct syscall_package *sp) |
108 | { |
109 | struct sysent *sy; |
110 | int i; |
111 | |
112 | KASSERT(kernconfig_is_held()); |
113 | |
114 | if (em == NULL) { |
115 | em = &emul_netbsd; |
116 | } |
117 | sy = em->e_sysent; |
118 | |
119 | /* |
120 | * Ensure that all preconditions are valid, since this is |
121 | * an all or nothing deal. Once a system call is entered, |
122 | * it can become busy and we could be unable to remove it |
123 | * on error. |
124 | */ |
125 | for (i = 0; sp[i].sp_call != NULL; i++) { |
126 | if (sy[sp[i].sp_code].sy_call != sys_nomodule) { |
127 | #ifdef DIAGNOSTIC |
128 | printf("syscall %d is busy\n" , sp[i].sp_code); |
129 | #endif |
130 | return EBUSY; |
131 | } |
132 | } |
133 | /* Everything looks good, patch them in. */ |
134 | for (i = 0; sp[i].sp_call != NULL; i++) { |
135 | sy[sp[i].sp_code].sy_call = sp[i].sp_call; |
136 | } |
137 | |
138 | return 0; |
139 | } |
140 | |
141 | int |
142 | syscall_disestablish(const struct emul *em, const struct syscall_package *sp) |
143 | { |
144 | struct sysent *sy; |
145 | uint64_t where; |
146 | lwp_t *l; |
147 | int i; |
148 | |
149 | KASSERT(kernconfig_is_held()); |
150 | |
151 | if (em == NULL) { |
152 | em = &emul_netbsd; |
153 | } |
154 | sy = em->e_sysent; |
155 | |
156 | /* |
157 | * First, patch the system calls to sys_nomodule to gate further |
158 | * activity. |
159 | */ |
160 | for (i = 0; sp[i].sp_call != NULL; i++) { |
161 | KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); |
162 | sy[sp[i].sp_code].sy_call = sys_nomodule; |
163 | } |
164 | |
165 | /* |
166 | * Run a cross call to cycle through all CPUs. This does two |
167 | * things: lock activity provides a barrier and makes our update |
168 | * of sy_call visible to all CPUs, and upon return we can be sure |
169 | * that we see pertinent values of l_sysent posted by remote CPUs. |
170 | */ |
171 | where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); |
172 | xc_wait(where); |
173 | |
174 | /* |
175 | * Now it's safe to check l_sysent. Run through all LWPs and see |
176 | * if anyone is still using the system call. |
177 | */ |
178 | for (i = 0; sp[i].sp_call != NULL; i++) { |
179 | mutex_enter(proc_lock); |
180 | LIST_FOREACH(l, &alllwp, l_list) { |
181 | if (l->l_sysent == &sy[sp[i].sp_code]) { |
182 | break; |
183 | } |
184 | } |
185 | mutex_exit(proc_lock); |
186 | if (l == NULL) { |
187 | continue; |
188 | } |
189 | /* |
190 | * We lose: one or more calls are still in use. Put back |
191 | * the old entrypoints and act like nothing happened. |
192 | * When we drop kernconfig_lock, any system calls held in |
193 | * sys_nomodule() will be restarted. |
194 | */ |
195 | for (i = 0; sp[i].sp_call != NULL; i++) { |
196 | sy[sp[i].sp_code].sy_call = sp[i].sp_call; |
197 | } |
198 | return EBUSY; |
199 | } |
200 | |
201 | return 0; |
202 | } |
203 | |
204 | /* |
205 | * Return true if system call tracing is enabled for the specified process. |
206 | */ |
207 | bool |
208 | trace_is_enabled(struct proc *p) |
209 | { |
210 | #ifdef SYSCALL_DEBUG |
211 | return (true); |
212 | #endif |
213 | #ifdef KTRACE |
214 | if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) |
215 | return (true); |
216 | #endif |
217 | #ifdef PTRACE |
218 | if (ISSET(p->p_slflag, PSL_SYSCALL)) |
219 | return (true); |
220 | #endif |
221 | |
222 | return (false); |
223 | } |
224 | |
225 | /* |
226 | * Start trace of particular system call. If process is being traced, |
227 | * this routine is called by MD syscall dispatch code just before |
228 | * a system call is actually executed. |
229 | */ |
230 | int |
231 | trace_enter(register_t code, const struct sysent *sy, const void *args) |
232 | { |
233 | int error = 0; |
234 | |
235 | #ifdef KDTRACE_HOOKS |
236 | if (sy->sy_entry) { |
237 | struct emul *e = curlwp->l_proc->p_emul; |
238 | (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0); |
239 | } |
240 | #endif |
241 | |
242 | #ifdef SYSCALL_DEBUG |
243 | scdebug_call(code, args); |
244 | #endif /* SYSCALL_DEBUG */ |
245 | |
246 | ktrsyscall(code, args, sy->sy_narg); |
247 | |
248 | #ifdef PTRACE |
249 | if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == |
250 | (PSL_SYSCALL|PSL_TRACED)) { |
251 | process_stoptrace(); |
252 | if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { |
253 | /* tracer will emulate syscall for us */ |
254 | error = EJUSTRETURN; |
255 | } |
256 | } |
257 | #endif |
258 | return error; |
259 | } |
260 | |
261 | /* |
262 | * End trace of particular system call. If process is being traced, |
263 | * this routine is called by MD syscall dispatch code just after |
264 | * a system call finishes. |
265 | * MD caller guarantees the passed 'code' is within the supported |
266 | * system call number range for emulation the process runs under. |
267 | */ |
268 | void |
269 | trace_exit(register_t code, const struct sysent *sy, const void *args, |
270 | register_t rval[], int error) |
271 | { |
272 | #if defined(PTRACE) || defined(KDTRACE_HOOKS) |
273 | struct proc *p = curlwp->l_proc; |
274 | #endif |
275 | |
276 | #ifdef KDTRACE_HOOKS |
277 | if (sy->sy_return) { |
278 | (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args, |
279 | rval, error); |
280 | } |
281 | #endif |
282 | |
283 | #ifdef SYSCALL_DEBUG |
284 | scdebug_ret(code, error, rval); |
285 | #endif /* SYSCALL_DEBUG */ |
286 | |
287 | ktrsysret(code, error, rval); |
288 | |
289 | #ifdef PTRACE |
290 | if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == |
291 | (PSL_SYSCALL|PSL_TRACED)) |
292 | process_stoptrace(); |
293 | CLR(p->p_slflag, PSL_SYSCALLEMU); |
294 | #endif |
295 | } |
296 | |