1/* $NetBSD: md.c,v 1.78 2016/07/27 05:14:40 pgoyette Exp $ */
2
3/*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42#include <sys/cdefs.h>
43__KERNEL_RCSID(0, "$NetBSD: md.c,v 1.78 2016/07/27 05:14:40 pgoyette Exp $");
44
45#ifdef _KERNEL_OPT
46#include "opt_md.h"
47#else
48#define MEMORY_DISK_SERVER 1
49#endif
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/malloc.h>
54#include <sys/systm.h>
55#include <sys/buf.h>
56#include <sys/bufq.h>
57#include <sys/device.h>
58#include <sys/disk.h>
59#include <sys/stat.h>
60#include <sys/proc.h>
61#include <sys/conf.h>
62#include <sys/disklabel.h>
63
64#include <uvm/uvm_extern.h>
65
66#include <dev/md.h>
67
68#include "ioconf.h"
69/*
70 * The user-space functionality is included by default.
71 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
72 */
73#ifndef MEMORY_DISK_SERVER
74#error MEMORY_DISK_SERVER should be defined by opt_md.h
75#endif /* MEMORY_DISK_SERVER */
76
77/*
78 * We should use the raw partition for ioctl.
79 */
80#define MD_UNIT(unit) DISKUNIT(unit)
81
82/* autoconfig stuff... */
83
84struct md_softc {
85 device_t sc_dev; /* Self. */
86 struct disk sc_dkdev; /* hook for generic disk handling */
87 struct md_conf sc_md;
88 kmutex_t sc_lock; /* Protect self. */
89 kcondvar_t sc_cv; /* Wait here for work. */
90 struct bufq_state *sc_buflist;
91};
92/* shorthand for fields in sc_md: */
93#define sc_addr sc_md.md_addr
94#define sc_size sc_md.md_size
95#define sc_type sc_md.md_type
96
97static void md_attach(device_t, device_t, void *);
98static int md_detach(device_t, int);
99
100static dev_type_open(mdopen);
101static dev_type_close(mdclose);
102static dev_type_read(mdread);
103static dev_type_write(mdwrite);
104static dev_type_ioctl(mdioctl);
105static dev_type_strategy(mdstrategy);
106static dev_type_size(mdsize);
107
108const struct bdevsw md_bdevsw = {
109 .d_open = mdopen,
110 .d_close = mdclose,
111 .d_strategy = mdstrategy,
112 .d_ioctl = mdioctl,
113 .d_dump = nodump,
114 .d_psize = mdsize,
115 .d_discard = nodiscard,
116 .d_flag = D_DISK | D_MPSAFE
117};
118
119const struct cdevsw md_cdevsw = {
120 .d_open = mdopen,
121 .d_close = mdclose,
122 .d_read = mdread,
123 .d_write = mdwrite,
124 .d_ioctl = mdioctl,
125 .d_stop = nostop,
126 .d_tty = notty,
127 .d_poll = nopoll,
128 .d_mmap = nommap,
129 .d_kqfilter = nokqfilter,
130 .d_discard = nodiscard,
131 .d_flag = D_DISK
132};
133
134static struct dkdriver mddkdriver = {
135 .d_strategy = mdstrategy
136};
137
138extern struct cfdriver md_cd;
139CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
140 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
141
142static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
143extern size_t md_root_size;
144
145static void md_set_disklabel(struct md_softc *);
146
147/*
148 * This is called if we are configured as a pseudo-device
149 */
150void
151mdattach(int n)
152{
153
154 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
155 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
156 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
157 return;
158 }
159}
160
161static void
162md_attach(device_t parent, device_t self, void *aux)
163{
164 struct md_softc *sc = device_private(self);
165
166 sc->sc_dev = self;
167 sc->sc_type = MD_UNCONFIGURED;
168 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
169 cv_init(&sc->sc_cv, "mdidle");
170 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
171
172 /* XXX - Could accept aux info here to set the config. */
173#ifdef MEMORY_DISK_HOOKS
174 /*
175 * This external function might setup a pre-loaded disk.
176 * All it would need to do is setup the md_conf struct.
177 * See sys/dev/md_root.c for an example.
178 */
179 md_attach_hook(device_unit(self), &sc->sc_md);
180#endif
181
182 /*
183 * Initialize and attach the disk structure.
184 */
185 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
186 disk_attach(&sc->sc_dkdev);
187
188 if (sc->sc_type != MD_UNCONFIGURED)
189 md_set_disklabel(sc);
190
191 if (!pmf_device_register(self, NULL, NULL))
192 aprint_error_dev(self, "couldn't establish power handler\n");
193}
194
195static int
196md_detach(device_t self, int flags)
197{
198 struct md_softc *sc = device_private(self);
199 int rc;
200
201 rc = 0;
202 mutex_enter(&sc->sc_dkdev.dk_openlock);
203 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
204 ; /* nothing to do */
205 else if ((flags & DETACH_FORCE) == 0)
206 rc = EBUSY;
207 mutex_exit(&sc->sc_dkdev.dk_openlock);
208
209 if (rc != 0)
210 return rc;
211
212 pmf_device_deregister(self);
213 disk_detach(&sc->sc_dkdev);
214 disk_destroy(&sc->sc_dkdev);
215 bufq_free(sc->sc_buflist);
216 mutex_destroy(&sc->sc_lock);
217 cv_destroy(&sc->sc_cv);
218 return 0;
219}
220
221/*
222 * operational routines:
223 * open, close, read, write, strategy,
224 * ioctl, dump, size
225 */
226
227#if MEMORY_DISK_SERVER
228static int md_server_loop(struct md_softc *sc);
229static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
230 struct lwp *l);
231#endif /* MEMORY_DISK_SERVER */
232static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
233 struct lwp *l);
234
235static int
236mdsize(dev_t dev)
237{
238 struct md_softc *sc;
239 int res;
240
241 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
242 if (sc == NULL)
243 return 0;
244
245 mutex_enter(&sc->sc_lock);
246 if (sc->sc_type == MD_UNCONFIGURED)
247 res = 0;
248 else
249 res = sc->sc_size >> DEV_BSHIFT;
250 mutex_exit(&sc->sc_lock);
251
252 return res;
253}
254
255static int
256mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
257{
258 int unit;
259 int part = DISKPART(dev);
260 int pmask = 1 << part;
261 cfdata_t cf;
262 struct md_softc *sc;
263 struct disk *dk;
264#ifdef MEMORY_DISK_HOOKS
265 bool configured;
266#endif
267
268 mutex_enter(&md_device_lock);
269 unit = MD_UNIT(dev);
270 sc = device_lookup_private(&md_cd, unit);
271 if (sc == NULL) {
272 if (part != RAW_PART) {
273 mutex_exit(&md_device_lock);
274 return ENXIO;
275 }
276 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
277 cf->cf_name = md_cd.cd_name;
278 cf->cf_atname = md_cd.cd_name;
279 cf->cf_unit = unit;
280 cf->cf_fstate = FSTATE_STAR;
281 sc = device_private(config_attach_pseudo(cf));
282 if (sc == NULL) {
283 mutex_exit(&md_device_lock);
284 return ENOMEM;
285 }
286 }
287
288 dk = &sc->sc_dkdev;
289
290 /*
291 * The raw partition is used for ioctl to configure.
292 */
293 if (part == RAW_PART)
294 goto ok;
295
296#ifdef MEMORY_DISK_HOOKS
297 /* Call the open hook to allow loading the device. */
298 configured = (sc->sc_type != MD_UNCONFIGURED);
299 md_open_hook(unit, &sc->sc_md);
300 /* initialize disklabel if the device is configured in open hook */
301 if (!configured && sc->sc_type != MD_UNCONFIGURED)
302 md_set_disklabel(sc);
303#endif
304
305 /*
306 * This is a normal, "slave" device, so
307 * enforce initialized.
308 */
309 if (sc->sc_type == MD_UNCONFIGURED) {
310 mutex_exit(&md_device_lock);
311 return ENXIO;
312 }
313
314ok:
315 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
316 mutex_enter(&dk->dk_openlock);
317 /* Mark our unit as open. */
318 switch (fmt) {
319 case S_IFCHR:
320 dk->dk_copenmask |= pmask;
321 break;
322 case S_IFBLK:
323 dk->dk_bopenmask |= pmask;
324 break;
325 }
326
327 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
328
329 mutex_exit(&dk->dk_openlock);
330 mutex_exit(&md_device_lock);
331 return 0;
332}
333
334static int
335mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
336{
337 int part = DISKPART(dev);
338 int pmask = 1 << part;
339 int error;
340 cfdata_t cf;
341 struct md_softc *sc;
342 struct disk *dk;
343
344 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
345 if (sc == NULL)
346 return ENXIO;
347
348 dk = &sc->sc_dkdev;
349
350 mutex_enter(&dk->dk_openlock);
351
352 switch (fmt) {
353 case S_IFCHR:
354 dk->dk_copenmask &= ~pmask;
355 break;
356 case S_IFBLK:
357 dk->dk_bopenmask &= ~pmask;
358 break;
359 }
360 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
361 if (dk->dk_openmask != 0) {
362 mutex_exit(&dk->dk_openlock);
363 return 0;
364 }
365
366 mutex_exit(&dk->dk_openlock);
367
368 mutex_enter(&md_device_lock);
369 cf = device_cfdata(sc->sc_dev);
370 error = config_detach(sc->sc_dev, DETACH_QUIET);
371 if (! error)
372 free(cf, M_DEVBUF);
373 mutex_exit(&md_device_lock);
374 return error;
375}
376
377static int
378mdread(dev_t dev, struct uio *uio, int flags)
379{
380 struct md_softc *sc;
381
382 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
383
384 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
385 return ENXIO;
386
387 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
388}
389
390static int
391mdwrite(dev_t dev, struct uio *uio, int flags)
392{
393 struct md_softc *sc;
394
395 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
396
397 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
398 return ENXIO;
399
400 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
401}
402
403/*
404 * Handle I/O requests, either directly, or
405 * by passing them to the server process.
406 */
407static void
408mdstrategy(struct buf *bp)
409{
410 struct md_softc *sc;
411 void * addr;
412 size_t off, xfer;
413 bool is_read;
414
415 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
416
417 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
418 bp->b_error = ENXIO;
419 goto done;
420 }
421
422 mutex_enter(&sc->sc_lock);
423
424 switch (sc->sc_type) {
425#if MEMORY_DISK_SERVER
426 case MD_UMEM_SERVER:
427 /* Just add this job to the server's queue. */
428 bufq_put(sc->sc_buflist, bp);
429 cv_signal(&sc->sc_cv);
430 mutex_exit(&sc->sc_lock);
431 /* see md_server_loop() */
432 /* no biodone in this case */
433 return;
434#endif /* MEMORY_DISK_SERVER */
435
436 case MD_KMEM_FIXED:
437 case MD_KMEM_ALLOCATED:
438 /* These are in kernel space. Access directly. */
439 is_read = ((bp->b_flags & B_READ) == B_READ);
440 bp->b_resid = bp->b_bcount;
441 off = (bp->b_blkno << DEV_BSHIFT);
442 if (off >= sc->sc_size) {
443 if (is_read)
444 break; /* EOF */
445 goto set_eio;
446 }
447 xfer = bp->b_resid;
448 if (xfer > (sc->sc_size - off))
449 xfer = (sc->sc_size - off);
450 addr = (char *)sc->sc_addr + off;
451 disk_busy(&sc->sc_dkdev);
452 if (is_read)
453 memcpy(bp->b_data, addr, xfer);
454 else
455 memcpy(addr, bp->b_data, xfer);
456 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
457 bp->b_resid -= xfer;
458 break;
459
460 default:
461 bp->b_resid = bp->b_bcount;
462 set_eio:
463 bp->b_error = EIO;
464 break;
465 }
466 mutex_exit(&sc->sc_lock);
467
468 done:
469
470 biodone(bp);
471}
472
473static int
474mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
475{
476 struct md_softc *sc;
477 struct md_conf *umd;
478 int error;
479
480 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
481 return ENXIO;
482
483 mutex_enter(&sc->sc_lock);
484 if (sc->sc_type != MD_UNCONFIGURED) {
485 error = disk_ioctl(&sc->sc_dkdev, dev, cmd, data, flag, l);
486 if (error != EPASSTHROUGH) {
487 mutex_exit(&sc->sc_lock);
488 return 0;
489 }
490 }
491
492 /* If this is not the raw partition, punt! */
493 if (DISKPART(dev) != RAW_PART) {
494 mutex_exit(&sc->sc_lock);
495 return ENOTTY;
496 }
497
498 umd = (struct md_conf *)data;
499 error = EINVAL;
500 switch (cmd) {
501 case MD_GETCONF:
502 *umd = sc->sc_md;
503 error = 0;
504 break;
505
506 case MD_SETCONF:
507 /* Can only set it once. */
508 if (sc->sc_type != MD_UNCONFIGURED)
509 break;
510 switch (umd->md_type) {
511 case MD_KMEM_ALLOCATED:
512 error = md_ioctl_kalloc(sc, umd, l);
513 break;
514#if MEMORY_DISK_SERVER
515 case MD_UMEM_SERVER:
516 error = md_ioctl_server(sc, umd, l);
517 break;
518#endif /* MEMORY_DISK_SERVER */
519 default:
520 break;
521 }
522 break;
523 }
524 mutex_exit(&sc->sc_lock);
525 return error;
526}
527
528static void
529md_set_disklabel(struct md_softc *sc)
530{
531 struct disk_geom *dg = &sc->sc_dkdev.dk_geom;
532 struct disklabel *lp = sc->sc_dkdev.dk_label;
533 struct partition *pp;
534
535 memset(lp, 0, sizeof(*lp));
536
537 lp->d_secsize = DEV_BSIZE;
538 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
539 if (lp->d_secperunit >= (32*64)) {
540 lp->d_nsectors = 32;
541 lp->d_ntracks = 64;
542 lp->d_ncylinders = lp->d_secperunit / (32*64);
543 } else {
544 lp->d_nsectors = 1;
545 lp->d_ntracks = 1;
546 lp->d_ncylinders = lp->d_secperunit;
547 }
548 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
549
550 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
551 lp->d_type = DKTYPE_MD;
552 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
553 lp->d_rpm = 3600;
554 lp->d_interleave = 1;
555 lp->d_flags = 0;
556
557 pp = &lp->d_partitions[0];
558 pp->p_offset = 0;
559 pp->p_size = lp->d_secperunit;
560 pp->p_fstype = FS_BSDFFS;
561
562 pp = &lp->d_partitions[RAW_PART];
563 pp->p_offset = 0;
564 pp->p_size = lp->d_secperunit;
565 pp->p_fstype = FS_UNUSED;
566
567 lp->d_npartitions = RAW_PART+1;
568 lp->d_magic = DISKMAGIC;
569 lp->d_magic2 = DISKMAGIC;
570 lp->d_checksum = dkcksum(lp);
571
572 memset(dg, 0, sizeof(*dg));
573
574 dg->dg_secsize = lp->d_secsize;
575 dg->dg_secperunit = lp->d_secperunit;
576 dg->dg_nsectors = lp->d_nsectors;
577 dg->dg_ntracks = lp->d_ntracks = 64;;
578 dg->dg_ncylinders = lp->d_ncylinders;
579
580 disk_set_info(sc->sc_dev, &sc->sc_dkdev, NULL);
581}
582
583/*
584 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
585 * Just allocate some kernel memory and return.
586 */
587static int
588md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
589 struct lwp *l)
590{
591 vaddr_t addr;
592 vsize_t size;
593
594 mutex_exit(&sc->sc_lock);
595
596 /* Sanity check the size. */
597 size = umd->md_size;
598 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
599
600 mutex_enter(&sc->sc_lock);
601
602 if (!addr)
603 return ENOMEM;
604
605 /* If another thread beat us to configure this unit: fail. */
606 if (sc->sc_type != MD_UNCONFIGURED) {
607 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
608 return EINVAL;
609 }
610
611 /* This unit is now configured. */
612 sc->sc_addr = (void *)addr; /* kernel space */
613 sc->sc_size = (size_t)size;
614 sc->sc_type = MD_KMEM_ALLOCATED;
615 md_set_disklabel(sc);
616 return 0;
617}
618
619#if MEMORY_DISK_SERVER
620
621/*
622 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
623 * Set config, then become the I/O server for this unit.
624 */
625static int
626md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
627 struct lwp *l)
628{
629 vaddr_t end;
630 int error;
631
632 KASSERT(mutex_owned(&sc->sc_lock));
633
634 /* Sanity check addr, size. */
635 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
636
637 if ((end >= VM_MAXUSER_ADDRESS) ||
638 (end < ((vaddr_t) umd->md_addr)) )
639 return EINVAL;
640
641 /* This unit is now configured. */
642 sc->sc_addr = umd->md_addr; /* user space */
643 sc->sc_size = umd->md_size;
644 sc->sc_type = MD_UMEM_SERVER;
645 md_set_disklabel(sc);
646
647 /* Become the server daemon */
648 error = md_server_loop(sc);
649
650 /* This server is now going away! */
651 sc->sc_type = MD_UNCONFIGURED;
652 sc->sc_addr = 0;
653 sc->sc_size = 0;
654
655 return (error);
656}
657
658static int
659md_server_loop(struct md_softc *sc)
660{
661 struct buf *bp;
662 void *addr; /* user space address */
663 size_t off; /* offset into "device" */
664 size_t xfer; /* amount to transfer */
665 int error;
666 bool is_read;
667
668 KASSERT(mutex_owned(&sc->sc_lock));
669
670 for (;;) {
671 /* Wait for some work to arrive. */
672 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
673 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
674 if (error)
675 return error;
676 }
677
678 /* Do the transfer to/from user space. */
679 mutex_exit(&sc->sc_lock);
680 error = 0;
681 is_read = ((bp->b_flags & B_READ) == B_READ);
682 bp->b_resid = bp->b_bcount;
683 off = (bp->b_blkno << DEV_BSHIFT);
684 if (off >= sc->sc_size) {
685 if (is_read)
686 goto done; /* EOF (not an error) */
687 error = EIO;
688 goto done;
689 }
690 xfer = bp->b_resid;
691 if (xfer > (sc->sc_size - off))
692 xfer = (sc->sc_size - off);
693 addr = (char *)sc->sc_addr + off;
694 disk_busy(&sc->sc_dkdev);
695 if (is_read)
696 error = copyin(addr, bp->b_data, xfer);
697 else
698 error = copyout(bp->b_data, addr, xfer);
699 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
700 if (!error)
701 bp->b_resid -= xfer;
702
703 done:
704 if (error) {
705 bp->b_error = error;
706 }
707 biodone(bp);
708 mutex_enter(&sc->sc_lock);
709 }
710}
711#endif /* MEMORY_DISK_SERVER */
712