uvm_emap.c source code [src/src/sys/uvm/uvm_emap.c]

1	/ $NetBSD: uvm_emap.c,v 1.11 2014/11/27 14:25:01 uebayasi Exp $ /
2
3	/-*
4	* Copyright (c) 2009, 2010 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Mindaugas Rasiukevicius and Andrew Doran.
9	*
10	* Redistribution and use in source and binary forms, with or without
11	* modification, are permitted provided that the following conditions
12	* are met:
13	* 1. Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29	* POSSIBILITY OF SUCH DAMAGE.
30	*/
31
32	/*
33	* UVM ephemeral mapping interface.
34	*/
35
36	/*
37	* Overview:
38	*
39	* On multiprocessor systems, frequent uses of pmap_kenter_pa/pmap_kremove
40	* for ephemeral mappings are not desirable because they likely involve
41	* TLB flush IPIs because that pmap_kernel() is shared among all LWPs.
42	* This interface can be used instead, to reduce the number of IPIs.
43	*
44	* For a single-page mapping, PMAP_DIRECT_MAP is likely a better choice
45	* if available. (__HAVE_DIRECT_MAP)
46	*/
47
48	/*
49	* How to use:
50	*
51	* Map pages at the address:
52	*
53	* uvm_emap_enter(va, pgs, npages);
54	* gen = uvm_emap_produce();
55	*
56	* Read pages via the mapping:
57	*
58	* uvm_emap_consume(gen);
59	* some_access(va);
60	*
61	* After finishing using the mapping:
62	*
63	* uvm_emap_remove(va, len);
64	*/
65
66	/*
67	* Notes for pmap developers:
68	*
69	* Generic (more expensive) stubs are implemented for architectures which
70	* do not support pmap.
71	*
72	* Note that uvm_emap_update() is called from lower pmap(9) layer, while
73	* other functions call to pmap(9). Typical pattern of update in pmap:
74	*
75	* u_int gen = uvm_emap_gen_return();
76	* tlbflush();
77	* uvm_emap_update();
78	*
79	* It is also used from IPI context, therefore functions must safe.
80	*/
81
82	#include <sys/cdefs.h>
83	__KERNEL_RCSID(`0`, "$NetBSD: uvm_emap.c,v 1.11 2014/11/27 14:25:01 uebayasi Exp $");
84
85	#include <sys/param.h>
86	#include <sys/kernel.h>
87	#include <sys/cpu.h>
88	#include <sys/atomic.h>
89	#include <sys/lwp.h>
90	#include <sys/vmem.h>
91	#include <sys/types.h>
92
93	#include <uvm/uvm.h>
94	#include <uvm/uvm_extern.h>
95
96	/ XXX: Arbitrary. /
97	#ifdef _LP64
98	#define UVM_EMAP_SIZE (128 * 1024 * 1024) /* 128 MB */
99	#else
100	#define UVM_EMAP_SIZE (32 * 1024 * 1024) /* 32 MB */
101	#endif
102
103	static u_int _uvm_emap_gen[COHERENCY_UNIT - sizeof(u_int)]
104	__aligned(COHERENCY_UNIT);
105
106	#define uvm_emap_gen (_uvm_emap_gen[0])
107
108	u_int uvm_emap_size = UVM_EMAP_SIZE;
109	static vaddr_t uvm_emap_va;
110	static vmem_t * uvm_emap_vmem;
111
112	/*
113	* uvm_emap_init: initialize subsystem.
114	*/
115	void
116	uvm_emap_sysinit(void)
117	{
118	struct uvm_cpu *ucpu;
119	/ size_t qmax; /
120	u_int i;
121
122	uvm_emap_size = roundup(uvm_emap_size, PAGE_SIZE);
123	#if 0
124	qmax = `16` * PAGE_SIZE;
125	uvm_emap_va = uvm_km_alloc(kernel_map, uvm_emap_size, `0`,
126	UVM_KMF_VAONLY \| UVM_KMF_WAITVA);
127	if (uvm_emap_va == `0`) {
128	panic("uvm_emap_init: KVA allocation failed");
129	}
130
131	uvm_emap_vmem = vmem_create("emap", uvm_emap_va, uvm_emap_size,
132	PAGE_SIZE, NULL, NULL, NULL, qmax, VM_SLEEP, IPL_NONE);
133	if (uvm_emap_vmem == NULL) {
134	panic("uvm_emap_init: vmem creation failed");
135	}
136	#else
137	uvm_emap_va = `0`;
138	uvm_emap_vmem = NULL;
139	#endif
140	/ Initial generation value is 1. /
141	uvm_emap_gen = `1`;
142	for (i = `0`; i < maxcpus; i++) {
143	ucpu = uvm.cpus[i];
144	if (ucpu != NULL) {
145	ucpu->emap_gen = `1`;
146	}
147	}
148	}
149
150	/*
151	* uvm_emap_alloc: allocate a window.
152	*/
153	vaddr_t
154	uvm_emap_alloc(vsize_t size, bool waitok)
155	{
156	vmem_addr_t addr;
157
158	KASSERT(size > `0`);
159	KASSERT(round_page(size) == size);
160
161	if (vmem_alloc(uvm_emap_vmem, size,
162	VM_INSTANTFIT \| (waitok ? VM_SLEEP : VM_NOSLEEP), &addr) == `0`)
163	return (vaddr_t)addr;
164
165	return (vaddr_t)`0`;
166	}
167
168	/*
169	* uvm_emap_free: free a window.
170	*/
171	void
172	uvm_emap_free(vaddr_t va, size_t size)
173	{
174
175	KASSERT(va >= uvm_emap_va);
176	KASSERT(size <= uvm_emap_size);
177	KASSERT(va + size <= uvm_emap_va + uvm_emap_size);
178
179	vmem_free(uvm_emap_vmem, va, size);
180	}
181
182	#ifdef __HAVE_PMAP_EMAP
183
184	/*
185	* uvm_emap_enter: enter a new mapping, without TLB flush.
186	*/
187	void
188	uvm_emap_enter(vaddr_t va, struct vm_page **pgs, u_int npages)
189	{
190	paddr_t pa;
191	u_int n;
192
193	for (n = `0`; n < npages; n++, va += PAGE_SIZE) {
194	pa = VM_PAGE_TO_PHYS(pgs[n]);
195	pmap_emap_enter(va, pa, VM_PROT_READ);
196	}
197	}
198
199	/*
200	* uvm_emap_remove: remove a mapping.
201	*/
202	void
203	uvm_emap_remove(vaddr_t sva, vsize_t len)
204	{
205
206	pmap_emap_remove(sva, len);
207	}
208
209	/*
210	* uvm_emap_gen_return: get the global generation number.
211	*
212	* => can be called from IPI handler, therefore function must be safe.
213	*/
214	u_int
215	uvm_emap_gen_return(void)
216	{
217	u_int gen;
218
219	gen = uvm_emap_gen;
220	if (__predict_false(gen == UVM_EMAP_INACTIVE)) {
221	/*
222	* Instead of looping, just increase in our side.
223	* Other thread could race and increase it again,
224	* but without any negative effect.
225	*/
226	gen = atomic_inc_uint_nv(&uvm_emap_gen);
227	}
228	KASSERT(gen != UVM_EMAP_INACTIVE);
229	return gen;
230	}
231
232	/*
233	* uvm_emap_switch: if the CPU is 'behind' the LWP in emap visibility,
234	* perform TLB flush and thus update the local view. Main purpose is
235	* to handle kernel preemption, while emap is in use.
236	*
237	* => called from mi_switch(), when LWP returns after block or preempt.
238	*/
239	void
240	uvm_emap_switch(lwp_t *l)
241	{
242	struct uvm_cpu *ucpu;
243	u_int curgen, gen;
244
245	KASSERT(kpreempt_disabled());
246
247	/ If LWP did not use emap, then nothing to do. /
248	if (__predict_true(l->l_emap_gen == UVM_EMAP_INACTIVE)) {
249	return;
250	}
251
252	/*
253	* No need to synchronise if generation number of current CPU is
254	* newer than the number of this LWP.
255	*
256	* This test assumes two's complement arithmetic and allows
257	* ~2B missed updates before it will produce bad results.
258	*/
259	ucpu = curcpu()->ci_data.cpu_uvm;
260	curgen = ucpu->emap_gen;
261	gen = l->l_emap_gen;
262	if (__predict_true((signed int)(curgen - gen) >= `0`)) {
263	return;
264	}
265
266	/*
267	* See comments in uvm_emap_consume() about memory
268	* barriers and race conditions.
269	*/
270	curgen = uvm_emap_gen_return();
271	pmap_emap_sync(false);
272	ucpu->emap_gen = curgen;
273	}
274
275	/*
276	* uvm_emap_consume: update the current CPU and LWP to the given generation
277	* of the emap. In a case of LWP migration to a different CPU after block
278	* or preempt, uvm_emap_switch() will synchronise.
279	*
280	* => may be called from both interrupt and thread context.
281	*/
282	void
283	uvm_emap_consume(u_int gen)
284	{
285	struct cpu_info *ci;
286	struct uvm_cpu *ucpu;
287	lwp_t *l = curlwp;
288	u_int curgen;
289
290	if (gen == UVM_EMAP_INACTIVE) {
291	return;
292	}
293
294	/*
295	* No need to synchronise if generation number of current CPU is
296	* newer than the number of this LWP.
297	*
298	* This test assumes two's complement arithmetic and allows
299	* ~2B missed updates before it will produce bad results.
300	*/
301	kpreempt_disable();
302	ci = l->l_cpu;
303	ucpu = ci->ci_data.cpu_uvm;
304	if (__predict_true((signed int)(ucpu->emap_gen - gen) >= `0`)) {
305	l->l_emap_gen = ucpu->emap_gen;
306	kpreempt_enable();
307	return;
308	}
309
310	/*
311	* Record the current generation _before_ issuing the TLB flush.
312	* No need for a memory barrier before, as reading a stale value
313	* for uvm_emap_gen is not a problem.
314	*
315	* pmap_emap_sync() must implicitly perform a full memory barrier,
316	* which prevents us from fetching a value from after the TLB flush
317	* has occurred (which would be bad).
318	*
319	* We can race with an interrupt on the current CPU updating the
320	* counter to a newer value. This could cause us to set a stale
321	* value into ucpu->emap_gen, overwriting a newer update from the
322	* interrupt. However, it does not matter since:
323	* (1) Interrupts always run to completion or block.
324	* (2) Interrupts will only ever install a newer value and,
325	* (3) We will roll the value forward later.
326	*/
327	curgen = uvm_emap_gen_return();
328	pmap_emap_sync(true);
329	ucpu->emap_gen = curgen;
330	l->l_emap_gen = curgen;
331	KASSERT((signed int)(curgen - gen) >= `0`);
332	kpreempt_enable();
333	}
334
335	/*
336	* uvm_emap_produce: increment emap generation counter.
337	*
338	* => pmap updates must be globally visible.
339	* => caller must have already entered mappings.
340	* => may be called from both interrupt and thread context.
341	*/
342	u_int
343	uvm_emap_produce(void)
344	{
345	u_int gen;
346	again:
347	gen = atomic_inc_uint_nv(&uvm_emap_gen);
348	if (__predict_false(gen == UVM_EMAP_INACTIVE)) {
349	goto again;
350	}
351	return gen;
352	}
353
354	/*
355	* uvm_emap_update: update global emap generation number for current CPU.
356	*
357	* Function is called by MD code (eg. pmap) to take advantage of TLB flushes
358	* initiated for other reasons, that sync the emap as a side effect. Note
359	* update should be performed before the actual TLB flush, to avoid race
360	* with newly generated number.
361	*
362	* => can be called from IPI handler, therefore function must be safe.
363	* => should be called _after_ TLB flush.
364	* => emap generation number should be taken _before_ TLB flush.
365	* => must be called with preemption disabled.
366	*/
367	void
368	uvm_emap_update(u_int gen)
369	{
370	struct uvm_cpu *ucpu;
371
372	/*
373	* See comments in uvm_emap_consume() about memory barriers and
374	* race conditions. Store is atomic if emap_gen size is word.
375	*/
376	CTASSERT(sizeof(ucpu->emap_gen) == sizeof(int));
377	/ XXX: KASSERT(kpreempt_disabled()); /
378
379	ucpu = curcpu()->ci_data.cpu_uvm;
380	ucpu->emap_gen = gen;
381	}
382
383	#else
384
385	/*
386	* Stubs for architectures which do not support emap.
387	*/
388
389	void
390	uvm_emap_enter(vaddr_t va, struct vm_page **pgs, u_int npages)
391	{
392	paddr_t pa;
393	u_int n;
394
395	for (n = `0`; n < npages; n++, va += PAGE_SIZE) {
396	pa = VM_PAGE_TO_PHYS(pgs[n]);
397	pmap_kenter_pa(va, pa, VM_PROT_READ, `0`);
398	}
399	pmap_update(pmap_kernel());
400	}
401
402	void
403	uvm_emap_remove(vaddr_t sva, vsize_t len)
404	{
405
406	pmap_kremove(sva, len);
407	pmap_update(pmap_kernel());
408	}
409
410	#endif
411

Browse the source code of src/src/sys/uvm/uvm_emap.c