1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 /*
  29  * lofi (loopback file) driver - allows you to attach a file to a device,
  30  * which can then be accessed through that device. The simple model is that
  31  * you tell lofi to open a file, and then use the block device you get as
  32  * you would any block device. lofi translates access to the block device
  33  * into I/O on the underlying file. This is mostly useful for
  34  * mounting images of filesystems.
  35  *
  36  * lofi is controlled through /dev/lofictl - this is the only device exported
  37  * during attach, and is minor number 0. lofiadm communicates with lofi through
  38  * ioctls on this device. When a file is attached to lofi, block and character
  39  * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices
  40  * are identified by their minor number, and the minor number is also used
  41  * as the name in /dev/lofi. If we ever decide to support virtual disks,
  42  * we'll have to divide the minor number space to identify fdisk partitions
  43  * and slices, and the name will then be the minor number shifted down a
  44  * few bits. Minor devices are tracked with state structures handled with
  45  * ddi_soft_state(9F) for simplicity.
  46  *
  47  * A file attached to lofi is opened when attached and not closed until
  48  * explicitly detached from lofi. This seems more sensible than deferring
  49  * the open until the /dev/lofi device is opened, for a number of reasons.
  50  * One is that any failure is likely to be noticed by the person (or script)
  51  * running lofiadm. Another is that it would be a security problem if the
  52  * file was replaced by another one after being added but before being opened.
  53  *
  54  * The only hard part about lofi is the ioctls. In order to support things
  55  * like 'newfs' on a lofi device, it needs to support certain disk ioctls.
  56  * So it has to fake disk geometry and partition information. More may need
  57  * to be faked if your favorite utility doesn't work and you think it should
  58  * (fdformat doesn't work because it really wants to know the type of floppy
  59  * controller to talk to, and that didn't seem easy to fake. Or possibly even
  60  * necessary, since we have mkfs_pcfs now).
  61  *
  62  * Normally, a lofi device cannot be detached if it is open (i.e. busy).  To
  63  * support simulation of hotplug events, an optional force flag is provided.
  64  * If a lofi device is open when a force detach is requested, then the
  65  * underlying file is closed and any subsequent operations return EIO.  When the
  66  * device is closed for the last time, it will be cleaned up at that time.  In
  67  * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is
  68  * detached but not removed.
  69  *
  70  * Known problems:
  71  *
  72  *      UFS logging. Mounting a UFS filesystem image "logging"
  73  *      works for basic copy testing but wedges during a build of ON through
  74  *      that image. Some deadlock in lufs holding the log mutex and then
  75  *      getting stuck on a buf. So for now, don't do that.
  76  *
  77  *      Direct I/O. Since the filesystem data is being cached in the buffer
  78  *      cache, _and_ again in the underlying filesystem, it's tempting to
  79  *      enable direct I/O on the underlying file. Don't, because that deadlocks.
  80  *      I think to fix the cache-twice problem we might need filesystem support.
  81  *
  82  *      lofi on itself. The simple lock strategy (lofi_lock) precludes this
  83  *      because you'll be in lofi_ioctl, holding the lock when you open the
  84  *      file, which, if it's lofi, will grab lofi_lock. We prevent this for
  85  *      now, though not using ddi_soft_state(9F) would make it possible to
  86  *      do. Though it would still be silly.
  87  *
  88  * Interesting things to do:
  89  *
  90  *      Allow multiple files for each device. A poor-man's metadisk, basically.
  91  *
  92  *      Pass-through ioctls on block devices. You can (though it's not
  93  *      documented), give lofi a block device as a file name. Then we shouldn't
  94  *      need to fake a geometry. But this is also silly unless you're replacing
  95  *      metadisk.
  96  *
  97  *      Encryption. tpm would like this. Apparently Windows 2000 has it, and
  98  *      so does Linux.
  99  */
 100 
 101 #include <sys/types.h>
 102 #include <sys/sysmacros.h>
 103 #include <sys/cmn_err.h>
 104 #include <sys/uio.h>
 105 #include <sys/kmem.h>
 106 #include <sys/cred.h>
 107 #include <sys/mman.h>
 108 #include <sys/errno.h>
 109 #include <sys/aio_req.h>
 110 #include <sys/stat.h>
 111 #include <sys/file.h>
 112 #include <sys/modctl.h>
 113 #include <sys/conf.h>
 114 #include <sys/debug.h>
 115 #include <sys/vnode.h>
 116 #include <sys/lofi.h>
 117 #include <sys/fcntl.h>
 118 #include <sys/pathname.h>
 119 #include <sys/filio.h>
 120 #include <sys/fdio.h>
 121 #include <sys/open.h>
 122 #include <sys/disp.h>
 123 #include <vm/seg_map.h>
 124 #include <sys/ddi.h>
 125 #include <sys/sunddi.h>
 126 
 127 /* seems safer than having to get the string right many times */
 128 #define NBLOCKS_PROP_NAME       "Nblocks"
 129 #define SIZE_PROP_NAME  "Size"
 130 
 131 static dev_info_t *lofi_dip;
 132 static void     *lofi_statep;
 133 static kmutex_t lofi_lock;              /* state lock */
 134 
 135 /*
 136  * Because lofi_taskq_nthreads limits the actual swamping of the device, the
 137  * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
 138  * high.  If we want to be assured that the underlying device is always busy,
 139  * we must be sure that the number of bytes enqueued when the number of
 140  * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for
 141  * the duration of the sleep time in taskq_ent_alloc().  That is, lofi should
 142  * set maxalloc to be the maximum throughput (in bytes per second) of the
 143  * underlying device divided by the minimum I/O size.  We assume a realistic
 144  * maximum throughput of one hundred megabytes per second; we set maxalloc on
 145  * the lofi task queue to be 104857600 divided by DEV_BSIZE.
 146  */
 147 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE;
 148 static int lofi_taskq_nthreads = 4;     /* # of taskq threads per device */
 149 
 150 uint32_t lofi_max_files = LOFI_MAX_FILES;
 151 
 152 static int
 153 lofi_busy(void)
 154 {
 155         minor_t minor;
 156 
 157         /*
 158          * We need to make sure no mappings exist - mod_remove won't
 159          * help because the device isn't open.
 160          */
 161         mutex_enter(&lofi_lock);
 162         for (minor = 1; minor <= lofi_max_files; minor++) {
 163                 if (ddi_get_soft_state(lofi_statep, minor) != NULL) {
 164                         mutex_exit(&lofi_lock);
 165                         return (EBUSY);
 166                 }
 167         }
 168         mutex_exit(&lofi_lock);
 169         return (0);
 170 }
 171 
 172 static int
 173 is_opened(struct lofi_state *lsp)
 174 {
 175         ASSERT(mutex_owned(&lofi_lock));
 176         return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count);
 177 }
 178 
 179 static int
 180 mark_opened(struct lofi_state *lsp, int otyp)
 181 {
 182         ASSERT(mutex_owned(&lofi_lock));
 183         switch (otyp) {
 184         case OTYP_CHR:
 185                 lsp->ls_chr_open = 1;
 186                 break;
 187         case OTYP_BLK:
 188                 lsp->ls_blk_open = 1;
 189                 break;
 190         case OTYP_LYR:
 191                 lsp->ls_lyr_open_count++;
 192                 break;
 193         default:
 194                 return (-1);
 195         }
 196         return (0);
 197 }
 198 
 199 static void
 200 mark_closed(struct lofi_state *lsp, int otyp)
 201 {
 202         ASSERT(mutex_owned(&lofi_lock));
 203         switch (otyp) {
 204         case OTYP_CHR:
 205                 lsp->ls_chr_open = 0;
 206                 break;
 207         case OTYP_BLK:
 208                 lsp->ls_blk_open = 0;
 209                 break;
 210         case OTYP_LYR:
 211                 lsp->ls_lyr_open_count--;
 212                 break;
 213         default:
 214                 break;
 215         }
 216 }
 217 
 218 static void
 219 lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp,
 220     cred_t *credp)
 221 {
 222         dev_t   newdev;
 223         char    namebuf[50];
 224 
 225         if (lsp->ls_vp) {
 226                 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag,
 227                     1, 0, credp, NULL);
 228                 VN_RELE(lsp->ls_vp);
 229                 lsp->ls_vp = NULL;
 230         }
 231 
 232         newdev = makedevice(getmajor(dev), minor);
 233         (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
 234         (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
 235 
 236         (void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
 237         ddi_remove_minor_node(lofi_dip, namebuf);
 238         (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor);
 239         ddi_remove_minor_node(lofi_dip, namebuf);
 240 
 241         kmem_free(lsp->ls_filename, lsp->ls_filename_sz);
 242         taskq_destroy(lsp->ls_taskq);
 243         if (lsp->ls_kstat) {
 244                 kstat_delete(lsp->ls_kstat);
 245                 mutex_destroy(&lsp->ls_kstat_lock);
 246         }
 247         ddi_soft_state_free(lofi_statep, minor);
 248 }
 249 
 250 /*ARGSUSED*/
 251 static int
 252 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp)
 253 {
 254         minor_t minor;
 255         struct lofi_state *lsp;
 256 
 257         mutex_enter(&lofi_lock);
 258         minor = getminor(*devp);
 259         if (minor == 0) {
 260                 /* master control device */
 261                 /* must be opened exclusively */
 262                 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) {
 263                         mutex_exit(&lofi_lock);
 264                         return (EINVAL);
 265                 }
 266                 lsp = ddi_get_soft_state(lofi_statep, 0);
 267                 if (lsp == NULL) {
 268                         mutex_exit(&lofi_lock);
 269                         return (ENXIO);
 270                 }
 271                 if (is_opened(lsp)) {
 272                         mutex_exit(&lofi_lock);
 273                         return (EBUSY);
 274                 }
 275                 (void) mark_opened(lsp, OTYP_CHR);
 276                 mutex_exit(&lofi_lock);
 277                 return (0);
 278         }
 279 
 280         /* otherwise, the mapping should already exist */
 281         lsp = ddi_get_soft_state(lofi_statep, minor);
 282         if (lsp == NULL) {
 283                 mutex_exit(&lofi_lock);
 284                 return (EINVAL);
 285         }
 286 
 287         if (lsp->ls_vp == NULL) {
 288                 mutex_exit(&lofi_lock);
 289                 return (ENXIO);
 290         }
 291 
 292         if (mark_opened(lsp, otyp) == -1) {
 293                 mutex_exit(&lofi_lock);
 294                 return (EINVAL);
 295         }
 296 
 297         mutex_exit(&lofi_lock);
 298         return (0);
 299 }
 300 
 301 /*ARGSUSED*/
 302 static int
 303 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp)
 304 {
 305         minor_t minor;
 306         struct lofi_state *lsp;
 307 
 308         mutex_enter(&lofi_lock);
 309         minor = getminor(dev);
 310         lsp = ddi_get_soft_state(lofi_statep, minor);
 311         if (lsp == NULL) {
 312                 mutex_exit(&lofi_lock);
 313                 return (EINVAL);
 314         }
 315         mark_closed(lsp, otyp);
 316 
 317         /*
 318          * If we have forcibly closed the underlying device, and this is the
 319          * last close, then tear down the rest of the device.
 320          */
 321         if (minor != 0 && lsp->ls_vp == NULL && !is_opened(lsp))
 322                 lofi_free_handle(dev, minor, lsp, credp);
 323         mutex_exit(&lofi_lock);
 324         return (0);
 325 }
 326 
 327 /*
 328  * This is basically what strategy used to be before we found we
 329  * needed task queues.
 330  */
 331 static void
 332 lofi_strategy_task(void *arg)
 333 {
 334         struct buf *bp = (struct buf *)arg;
 335         int error;
 336         struct lofi_state *lsp;
 337         offset_t        offset, alignedoffset;
 338         offset_t        mapoffset;
 339         caddr_t bufaddr;
 340         caddr_t mapaddr;
 341         size_t  xfersize;
 342         size_t  len;
 343         int     isread;
 344         int     smflags;
 345         enum seg_rw srw;
 346 
 347         lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
 348         if (lsp->ls_kstat) {
 349                 mutex_enter(lsp->ls_kstat->ks_lock);
 350                 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat));
 351                 mutex_exit(lsp->ls_kstat->ks_lock);
 352         }
 353         bp_mapin(bp);
 354         bufaddr = bp->b_un.b_addr;
 355         offset = bp->b_lblkno * DEV_BSIZE;   /* offset within file */
 356 
 357         /*
 358          * We used to always use vn_rdwr here, but we cannot do that because
 359          * we might decide to read or write from the the underlying
 360          * file during this call, which would be a deadlock because
 361          * we have the rw_lock. So instead we page, unless it's not
 362          * mapable or it's a character device.
 363          */
 364         if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
 365                 error = EIO;
 366         } else if (((lsp->ls_vp->v_flag & VNOMAP) == 0) &&
 367             (lsp->ls_vp->v_type != VCHR)) {
 368                 /*
 369                  * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
 370                  * an 8K boundary, but the buf transfer address may not be
 371                  * aligned on more than a 512-byte boundary (we don't
 372                  * enforce that, though we could). This matters since the
 373                  * initial part of the transfer may not start at offset 0
 374                  * within the segmap'd chunk. So we have to compensate for
 375                  * that with 'mapoffset'. Subsequent chunks always start
 376                  * off at the beginning, and the last is capped by b_resid.
 377                  */
 378                 mapoffset = offset & MAXBOFFSET;
 379                 alignedoffset = offset - mapoffset;     /* now map-aligned */
 380                 bp->b_resid = bp->b_bcount;
 381                 isread = bp->b_flags & B_READ;
 382                 srw = isread ? S_READ : S_WRITE;
 383                 do {
 384                         xfersize = MIN(lsp->ls_vp_size - offset,
 385                             MIN(MAXBSIZE - mapoffset, bp->b_resid));
 386                         len = roundup(mapoffset + xfersize, PAGESIZE);
 387                         mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp,
 388                             alignedoffset, MAXBSIZE, 1, srw);
 389                         /*
 390                          * Now fault in the pages. This lets us check
 391                          * for errors before we reference mapaddr and
 392                          * try to resolve the fault in bcopy (which would
 393                          * panic instead). And this can easily happen,
 394                          * particularly if you've lofi'd a file over NFS
 395                          * and someone deletes the file on the server.
 396                          */
 397                         error = segmap_fault(kas.a_hat, segkmap, mapaddr,
 398                             len, F_SOFTLOCK, srw);
 399                         if (error) {
 400                                 (void) segmap_release(segkmap, mapaddr, 0);
 401                                 if (FC_CODE(error) == FC_OBJERR)
 402                                         error = FC_ERRNO(error);
 403                                 else
 404                                         error = EIO;
 405                                 break;
 406                         }
 407                         smflags = 0;
 408                         if (isread) {
 409                                 bcopy(mapaddr + mapoffset, bufaddr, xfersize);
 410                         } else {
 411                                 smflags |= SM_WRITE;
 412                                 bcopy(bufaddr, mapaddr + mapoffset, xfersize);
 413                         }
 414                         bp->b_resid -= xfersize;
 415                         bufaddr += xfersize;
 416                         offset += xfersize;
 417                         (void) segmap_fault(kas.a_hat, segkmap, mapaddr,
 418                             len, F_SOFTUNLOCK, srw);
 419                         error = segmap_release(segkmap, mapaddr, smflags);
 420                         /* only the first map may start partial */
 421                         mapoffset = 0;
 422                         alignedoffset += MAXBSIZE;
 423                 } while ((error == 0) && (bp->b_resid > 0) &&
 424                     (offset < lsp->ls_vp_size));
 425         } else {
 426                 ssize_t resid;
 427                 enum uio_rw rw;
 428 
 429                 if (bp->b_flags & B_READ)
 430                         rw = UIO_READ;
 431                 else
 432                         rw = UIO_WRITE;
 433                 error = vn_rdwr(rw, lsp->ls_vp, bufaddr, bp->b_bcount,
 434                     offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
 435                 bp->b_resid = resid;
 436         }
 437 
 438         if (lsp->ls_kstat) {
 439                 size_t n_done = bp->b_bcount - bp->b_resid;
 440                 kstat_io_t *kioptr;
 441 
 442                 mutex_enter(lsp->ls_kstat->ks_lock);
 443                 kioptr = KSTAT_IO_PTR(lsp->ls_kstat);
 444                 if (bp->b_flags & B_READ) {
 445                         kioptr->nread += n_done;
 446                         kioptr->reads++;
 447                 } else {
 448                         kioptr->nwritten += n_done;
 449                         kioptr->writes++;
 450                 }
 451                 kstat_runq_exit(kioptr);
 452                 mutex_exit(lsp->ls_kstat->ks_lock);
 453         }
 454 
 455         mutex_enter(&lsp->ls_vp_lock);
 456         if (--lsp->ls_vp_iocount == 0)
 457                 cv_broadcast(&lsp->ls_vp_cv);
 458         mutex_exit(&lsp->ls_vp_lock);
 459 
 460         bioerror(bp, error);
 461         biodone(bp);
 462 }
 463 
 464 static int
 465 lofi_strategy(struct buf *bp)
 466 {
 467         struct lofi_state *lsp;
 468         offset_t        offset;
 469 
 470         /*
 471          * We cannot just do I/O here, because the current thread
 472          * _might_ end up back in here because the underlying filesystem
 473          * wants a buffer, which eventually gets into bio_recycle and
 474          * might call into lofi to write out a delayed-write buffer.
 475          * This is bad if the filesystem above lofi is the same as below.
 476          *
 477          * We could come up with a complex strategy using threads to
 478          * do the I/O asynchronously, or we could use task queues. task
 479          * queues were incredibly easy so they win.
 480          */
 481         lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
 482         mutex_enter(&lsp->ls_vp_lock);
 483         if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
 484                 bioerror(bp, EIO);
 485                 biodone(bp);
 486                 mutex_exit(&lsp->ls_vp_lock);
 487                 return (0);
 488         }
 489 
 490         offset = bp->b_lblkno * DEV_BSIZE;   /* offset within file */
 491         if (offset == lsp->ls_vp_size) {
 492                 /* EOF */
 493                 if ((bp->b_flags & B_READ) != 0) {
 494                         bp->b_resid = bp->b_bcount;
 495                         bioerror(bp, 0);
 496                 } else {
 497                         /* writes should fail */
 498                         bioerror(bp, ENXIO);
 499                 }
 500                 biodone(bp);
 501                 mutex_exit(&lsp->ls_vp_lock);
 502                 return (0);
 503         }
 504         if (offset > lsp->ls_vp_size) {
 505                 bioerror(bp, ENXIO);
 506                 biodone(bp);
 507                 mutex_exit(&lsp->ls_vp_lock);
 508                 return (0);
 509         }
 510         lsp->ls_vp_iocount++;
 511         mutex_exit(&lsp->ls_vp_lock);
 512 
 513         if (lsp->ls_kstat) {
 514                 mutex_enter(lsp->ls_kstat->ks_lock);
 515                 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat));
 516                 mutex_exit(lsp->ls_kstat->ks_lock);
 517         }
 518         (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP);
 519         return (0);
 520 }
 521 
 522 /*ARGSUSED2*/
 523 static int
 524 lofi_read(dev_t dev, struct uio *uio, struct cred *credp)
 525 {
 526         if (getminor(dev) == 0)
 527                 return (EINVAL);
 528         return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio));
 529 }
 530 
 531 /*ARGSUSED2*/
 532 static int
 533 lofi_write(dev_t dev, struct uio *uio, struct cred *credp)
 534 {
 535         if (getminor(dev) == 0)
 536                 return (EINVAL);
 537         return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio));
 538 }
 539 
 540 /*ARGSUSED2*/
 541 static int
 542 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp)
 543 {
 544         if (getminor(dev) == 0)
 545                 return (EINVAL);
 546         return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio));
 547 }
 548 
 549 /*ARGSUSED2*/
 550 static int
 551 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp)
 552 {
 553         if (getminor(dev) == 0)
 554                 return (EINVAL);
 555         return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio));
 556 }
 557 
 558 /*ARGSUSED*/
 559 static int
 560 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 561 {
 562         switch (infocmd) {
 563         case DDI_INFO_DEVT2DEVINFO:
 564                 *result = lofi_dip;
 565                 return (DDI_SUCCESS);
 566         case DDI_INFO_DEVT2INSTANCE:
 567                 *result = 0;
 568                 return (DDI_SUCCESS);
 569         }
 570         return (DDI_FAILURE);
 571 }
 572 
 573 static int
 574 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 575 {
 576         int     error;
 577 
 578         if (cmd != DDI_ATTACH)
 579                 return (DDI_FAILURE);
 580         error = ddi_soft_state_zalloc(lofi_statep, 0);
 581         if (error == DDI_FAILURE) {
 582                 return (DDI_FAILURE);
 583         }
 584         error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0,
 585             DDI_PSEUDO, NULL);
 586         if (error == DDI_FAILURE) {
 587                 ddi_soft_state_free(lofi_statep, 0);
 588                 return (DDI_FAILURE);
 589         }
 590         /* driver handles kernel-issued IOCTLs */
 591         if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 592             DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
 593                 ddi_remove_minor_node(dip, NULL);
 594                 ddi_soft_state_free(lofi_statep, 0);
 595                 return (DDI_FAILURE);
 596         }
 597         lofi_dip = dip;
 598         ddi_report_dev(dip);
 599         return (DDI_SUCCESS);
 600 }
 601 
 602 static int
 603 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 604 {
 605         if (cmd != DDI_DETACH)
 606                 return (DDI_FAILURE);
 607         if (lofi_busy())
 608                 return (DDI_FAILURE);
 609         lofi_dip = NULL;
 610         ddi_remove_minor_node(dip, NULL);
 611         ddi_prop_remove_all(dip);
 612         ddi_soft_state_free(lofi_statep, 0);
 613         return (DDI_SUCCESS);
 614 }
 615 
 616 /*
 617  * These two just simplify the rest of the ioctls that need to copyin/out
 618  * the lofi_ioctl structure.
 619  */
 620 struct lofi_ioctl *
 621 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag)
 622 {
 623         struct lofi_ioctl *klip;
 624         int     error;
 625 
 626         klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP);
 627         error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag);
 628         if (error) {
 629                 kmem_free(klip, sizeof (struct lofi_ioctl));
 630                 return (NULL);
 631         }
 632 
 633         /* make sure filename is always null-terminated */
 634         klip->li_filename[MAXPATHLEN] = '\0';
 635 
 636         /* validate minor number */
 637         if (klip->li_minor > lofi_max_files) {
 638                 kmem_free(klip, sizeof (struct lofi_ioctl));
 639                 return (NULL);
 640         }
 641         return (klip);
 642 }
 643 
 644 int
 645 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip,
 646         int flag)
 647 {
 648         int     error;
 649 
 650         error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag);
 651         if (error)
 652                 return (EFAULT);
 653         return (0);
 654 }
 655 
 656 void
 657 free_lofi_ioctl(struct lofi_ioctl *klip)
 658 {
 659         kmem_free(klip, sizeof (struct lofi_ioctl));
 660 }
 661 
 662 /*
 663  * Return the minor number 'filename' is mapped to, if it is.
 664  */
 665 static int
 666 file_to_minor(char *filename)
 667 {
 668         minor_t minor;
 669         struct lofi_state *lsp;
 670 
 671         ASSERT(mutex_owned(&lofi_lock));
 672         for (minor = 1; minor <= lofi_max_files; minor++) {
 673                 lsp = ddi_get_soft_state(lofi_statep, minor);
 674                 if (lsp == NULL)
 675                         continue;
 676                 if (strcmp(lsp->ls_filename, filename) == 0)
 677                         return (minor);
 678         }
 679         return (0);
 680 }
 681 
 682 /*
 683  * lofiadm does some validation, but since Joe Random (or crashme) could
 684  * do our ioctls, we need to do some validation too.
 685  */
 686 static int
 687 valid_filename(const char *filename)
 688 {
 689         static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/";
 690         static char *charprefix = "/dev/" LOFI_CHAR_NAME "/";
 691 
 692         /* must be absolute path */
 693         if (filename[0] != '/')
 694                 return (0);
 695         /* must not be lofi */
 696         if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0)
 697                 return (0);
 698         if (strncmp(filename, charprefix, strlen(charprefix)) == 0)
 699                 return (0);
 700         return (1);
 701 }
 702 
 703 /*
 704  * Fakes up a disk geometry, and one big partition, based on the size
 705  * of the file. This is needed because we allow newfs'ing the device,
 706  * and newfs will do several disk ioctls to figure out the geometry and
 707  * partition information. It uses that information to determine the parameters
 708  * to pass to mkfs. Geometry is pretty much irrelevant these days, but we
 709  * have to support it.
 710  */
 711 static void
 712 fake_disk_geometry(struct lofi_state *lsp)
 713 {
 714         /* dk_geom - see dkio(7I) */
 715         /*
 716          * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
 717          * of sectors), but that breaks programs like fdisk which want to
 718          * partition a disk by cylinder. With one cylinder, you can't create
 719          * an fdisk partition and put pcfs on it for testing (hard to pick
 720          * a number between one and one).
 721          *
 722          * The cheezy floppy test is an attempt to not have too few cylinders
 723          * for a small file, or so many on a big file that you waste space
 724          * for backup superblocks or cylinder group structures.
 725          */
 726         if (lsp->ls_vp_size < (2 * 1024 * 1024)) /* floppy? */
 727                 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (100 * 1024);
 728         else
 729                 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (300 * 1024);
 730         /* in case file file is < 100k */
 731         if (lsp->ls_dkg.dkg_ncyl == 0)
 732                 lsp->ls_dkg.dkg_ncyl = 1;
 733         lsp->ls_dkg.dkg_acyl = 0;
 734         lsp->ls_dkg.dkg_bcyl = 0;
 735         lsp->ls_dkg.dkg_nhead = 1;
 736         lsp->ls_dkg.dkg_obs1 = 0;
 737         lsp->ls_dkg.dkg_intrlv = 0;
 738         lsp->ls_dkg.dkg_obs2 = 0;
 739         lsp->ls_dkg.dkg_obs3 = 0;
 740         lsp->ls_dkg.dkg_apc = 0;
 741         lsp->ls_dkg.dkg_rpm = 7200;
 742         lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl;
 743         lsp->ls_dkg.dkg_nsect = lsp->ls_vp_size /
 744             (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl);
 745         lsp->ls_dkg.dkg_write_reinstruct = 0;
 746         lsp->ls_dkg.dkg_read_reinstruct = 0;
 747 
 748         /* vtoc - see dkio(7I) */
 749         bzero(&lsp->ls_vtoc, sizeof (struct vtoc));
 750         lsp->ls_vtoc.v_sanity = VTOC_SANE;
 751         lsp->ls_vtoc.v_version = V_VERSION;
 752         bcopy(LOFI_DRIVER_NAME, lsp->ls_vtoc.v_volume, 7);
 753         lsp->ls_vtoc.v_sectorsz = DEV_BSIZE;
 754         lsp->ls_vtoc.v_nparts = 1;
 755         lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED;
 756         lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT;
 757         lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0;
 758         /*
 759          * The partition size cannot just be the number of sectors, because
 760          * that might not end on a cylinder boundary. And if that's the case,
 761          * newfs/mkfs will print a scary warning. So just figure the size
 762          * based on the number of cylinders and sectors/cylinder.
 763          */
 764         lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl *
 765             lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead;
 766 
 767         /* dk_cinfo - see dkio(7I) */
 768         bzero(&lsp->ls_ci, sizeof (struct dk_cinfo));
 769         (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME);
 770         lsp->ls_ci.dki_ctype = DKC_MD;
 771         lsp->ls_ci.dki_flags = 0;
 772         lsp->ls_ci.dki_cnum = 0;
 773         lsp->ls_ci.dki_addr = 0;
 774         lsp->ls_ci.dki_space = 0;
 775         lsp->ls_ci.dki_prio = 0;
 776         lsp->ls_ci.dki_vec = 0;
 777         (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME);
 778         lsp->ls_ci.dki_unit = 0;
 779         lsp->ls_ci.dki_slave = 0;
 780         lsp->ls_ci.dki_partition = 0;
 781         /*
 782          * newfs uses this to set maxcontig. Must not be < 16, or it
 783          * will be 0 when newfs multiplies it by DEV_BSIZE and divides
 784          * it by the block size. Then tunefs doesn't work because
 785          * maxcontig is 0.
 786          */
 787         lsp->ls_ci.dki_maxtransfer = 16;
 788 }
 789 
 790 /*
 791  * map a file to a minor number. Return the minor number.
 792  */
 793 static int
 794 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor,
 795     int *rvalp, struct cred *credp, int ioctl_flag)
 796 {
 797         minor_t newminor;
 798         struct lofi_state *lsp;
 799         struct lofi_ioctl *klip;
 800         int     error;
 801         struct vnode *vp;
 802         int64_t Nblocks_prop_val;
 803         int64_t Size_prop_val;
 804         vattr_t vattr;
 805         int     flag;
 806         enum vtype v_type;
 807         int zalloced = 0;
 808         dev_t   newdev;
 809         char    namebuf[50];
 810 
 811         klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
 812         if (klip == NULL)
 813                 return (EFAULT);
 814 
 815         mutex_enter(&lofi_lock);
 816 
 817         if (!valid_filename(klip->li_filename)) {
 818                 error = EINVAL;
 819                 goto out;
 820         }
 821 
 822         if (file_to_minor(klip->li_filename) != 0) {
 823                 error = EBUSY;
 824                 goto out;
 825         }
 826 
 827         if (pickminor) {
 828                 /* Find a free one */
 829                 for (newminor = 1; newminor <= lofi_max_files; newminor++)
 830                         if (ddi_get_soft_state(lofi_statep, newminor) == NULL)
 831                                 break;
 832                 if (newminor >= lofi_max_files) {
 833                         error = EAGAIN;
 834                         goto out;
 835                 }
 836         } else {
 837                 newminor = klip->li_minor;
 838                 if (ddi_get_soft_state(lofi_statep, newminor) != NULL) {
 839                         error = EEXIST;
 840                         goto out;
 841                 }
 842         }
 843 
 844         /* make sure it's valid */
 845         error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW,
 846             NULLVPP, &vp);
 847         if (error) {
 848                 goto out;
 849         }
 850         v_type = vp->v_type;
 851         VN_RELE(vp);
 852         if (!V_ISLOFIABLE(v_type)) {
 853                 error = EINVAL;
 854                 goto out;
 855         }
 856         flag = FREAD | FWRITE | FOFFMAX | FEXCL;
 857         error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0);
 858         if (error) {
 859                 /* try read-only */
 860                 flag &= ~FWRITE;
 861                 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0,
 862                     &vp, 0, 0);
 863                 if (error) {
 864                         goto out;
 865                 }
 866         }
 867         vattr.va_mask = AT_SIZE;
 868         error = VOP_GETATTR(vp, &vattr, 0, credp, NULL);
 869         if (error) {
 870                 goto closeout;
 871         }
 872         /* the file needs to be a multiple of the block size */
 873         if ((vattr.va_size % DEV_BSIZE) != 0) {
 874                 error = EINVAL;
 875                 goto closeout;
 876         }
 877         newdev = makedevice(getmajor(dev), newminor);
 878         Size_prop_val = vattr.va_size;
 879         if ((ddi_prop_update_int64(newdev, lofi_dip,
 880             SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) {
 881                 error = EINVAL;
 882                 goto closeout;
 883         }
 884         Nblocks_prop_val = vattr.va_size / DEV_BSIZE;
 885         if ((ddi_prop_update_int64(newdev, lofi_dip,
 886             NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
 887                 error = EINVAL;
 888                 goto propout;
 889         }
 890         error = ddi_soft_state_zalloc(lofi_statep, newminor);
 891         if (error == DDI_FAILURE) {
 892                 error = ENOMEM;
 893                 goto propout;
 894         }
 895         zalloced = 1;
 896         (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor);
 897         (void) ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor,
 898             DDI_PSEUDO, NULL);
 899         if (error != DDI_SUCCESS) {
 900                 error = ENXIO;
 901                 goto propout;
 902         }
 903         (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor);
 904         error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor,
 905             DDI_PSEUDO, NULL);
 906         if (error != DDI_SUCCESS) {
 907                 /* remove block node */
 908                 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor);
 909                 ddi_remove_minor_node(lofi_dip, namebuf);
 910                 error = ENXIO;
 911                 goto propout;
 912         }
 913         lsp = ddi_get_soft_state(lofi_statep, newminor);
 914         lsp->ls_filename_sz = strlen(klip->li_filename) + 1;
 915         lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP);
 916         (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d",
 917             LOFI_DRIVER_NAME, newminor);
 918         lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads,
 919             minclsyspri, 1, lofi_taskq_maxalloc, 0);
 920         lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor,
 921             NULL, "disk", KSTAT_TYPE_IO, 1, 0);
 922         if (lsp->ls_kstat) {
 923                 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL);
 924                 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock;
 925                 kstat_install(lsp->ls_kstat);
 926         }
 927         cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL);
 928         mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL);
 929 
 930         /*
 931          * save open mode so file can be closed properly and vnode counts
 932          * updated correctly.
 933          */
 934         lsp->ls_openflag = flag;
 935 
 936         /*
 937          * Try to handle stacked lofs vnodes.
 938          */
 939         if (vp->v_type == VREG) {
 940                 if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) {
 941                         lsp->ls_vp = vp;
 942                 } else {
 943                         /*
 944                          * Even though vp was obtained via vn_open(), we
 945                          * can't call vn_close() on it, since lofs will
 946                          * pass the VOP_CLOSE() on down to the realvp
 947                          * (which we are about to use). Hence we merely
 948                          * drop the reference to the lofs vnode and hold
 949                          * the realvp so things behave as if we've
 950                          * opened the realvp without any interaction
 951                          * with lofs.
 952                          */
 953                         VN_HOLD(lsp->ls_vp);
 954                         VN_RELE(vp);
 955                 }
 956         } else {
 957                 lsp->ls_vp = vp;
 958         }
 959         lsp->ls_vp_size = vattr.va_size;
 960         (void) strcpy(lsp->ls_filename, klip->li_filename);
 961         if (rvalp)
 962                 *rvalp = (int)newminor;
 963         klip->li_minor = newminor;
 964 
 965         fake_disk_geometry(lsp);
 966         mutex_exit(&lofi_lock);
 967         (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
 968         free_lofi_ioctl(klip);
 969         return (0);
 970 
 971 propout:
 972         (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
 973         (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
 974 closeout:
 975         (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL);
 976         VN_RELE(vp);
 977 out:
 978         if (zalloced)
 979                 ddi_soft_state_free(lofi_statep, newminor);
 980         mutex_exit(&lofi_lock);
 981         free_lofi_ioctl(klip);
 982         return (error);
 983 }
 984 
 985 /*
 986  * unmap a file.
 987  */
 988 static int
 989 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename,
 990     struct cred *credp, int ioctl_flag)
 991 {
 992         struct lofi_state *lsp;
 993         struct lofi_ioctl *klip;
 994         minor_t minor;
 995 
 996         klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
 997         if (klip == NULL)
 998                 return (EFAULT);
 999 
1000         mutex_enter(&lofi_lock);
1001         if (byfilename) {
1002                 minor = file_to_minor(klip->li_filename);
1003         } else {
1004                 minor = klip->li_minor;
1005         }
1006         if (minor == 0) {
1007                 mutex_exit(&lofi_lock);
1008                 free_lofi_ioctl(klip);
1009                 return (ENXIO);
1010         }
1011         lsp = ddi_get_soft_state(lofi_statep, minor);
1012         if (lsp == NULL || lsp->ls_vp == NULL) {
1013                 mutex_exit(&lofi_lock);
1014                 free_lofi_ioctl(klip);
1015                 return (ENXIO);
1016         }
1017 
1018         if (is_opened(lsp)) {
1019                 /*
1020                  * If the 'force' flag is set, then we forcibly close the
1021                  * underlying file.  Subsequent operations will fail, and the
1022                  * DKIOCSTATE ioctl will return DKIO_DEV_GONE.  When the device
1023                  * is last closed, the device will be cleaned up appropriately.
1024                  *
1025                  * This is complicated by the fact that we may have outstanding
1026                  * dispatched I/Os.  Rather than having a single mutex to
1027                  * serialize all I/O, we keep a count of the number of
1028                  * outstanding I/O requests, as well as a flag to indicate that
1029                  * no new I/Os should be dispatched.  We set the flag, wait for
1030                  * the number of outstanding I/Os to reach 0, and then close the
1031                  * underlying vnode.
1032                  */
1033                 if (klip->li_force) {
1034                         mutex_enter(&lsp->ls_vp_lock);
1035                         lsp->ls_vp_closereq = B_TRUE;
1036                         while (lsp->ls_vp_iocount > 0)
1037                                 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock);
1038                         (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0,
1039                             credp, NULL);
1040                         VN_RELE(lsp->ls_vp);
1041                         lsp->ls_vp = NULL;
1042                         cv_broadcast(&lsp->ls_vp_cv);
1043                         mutex_exit(&lsp->ls_vp_lock);
1044                         mutex_exit(&lofi_lock);
1045                         klip->li_minor = minor;
1046                         (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1047                         free_lofi_ioctl(klip);
1048                         return (0);
1049                 }
1050                 mutex_exit(&lofi_lock);
1051                 free_lofi_ioctl(klip);
1052                 return (EBUSY);
1053         }
1054 
1055         lofi_free_handle(dev, minor, lsp, credp);
1056 
1057         klip->li_minor = minor;
1058         mutex_exit(&lofi_lock);
1059         (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1060         free_lofi_ioctl(klip);
1061         return (0);
1062 }
1063 
1064 /*
1065  * get the filename given the minor number, or the minor number given
1066  * the name.
1067  */
1068 /*ARGSUSED*/
1069 static int
1070 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which,
1071     struct cred *credp, int ioctl_flag)
1072 {
1073         struct lofi_state *lsp;
1074         struct lofi_ioctl *klip;
1075         int     error;
1076         minor_t minor;
1077 
1078         klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
1079         if (klip == NULL)
1080                 return (EFAULT);
1081 
1082         switch (which) {
1083         case LOFI_GET_FILENAME:
1084                 minor = klip->li_minor;
1085                 if (minor == 0) {
1086                         free_lofi_ioctl(klip);
1087                         return (EINVAL);
1088                 }
1089 
1090                 mutex_enter(&lofi_lock);
1091                 lsp = ddi_get_soft_state(lofi_statep, minor);
1092                 if (lsp == NULL) {
1093                         mutex_exit(&lofi_lock);
1094                         free_lofi_ioctl(klip);
1095                         return (ENXIO);
1096                 }
1097                 (void) strcpy(klip->li_filename, lsp->ls_filename);
1098                 mutex_exit(&lofi_lock);
1099                 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1100                 free_lofi_ioctl(klip);
1101                 return (error);
1102         case LOFI_GET_MINOR:
1103                 mutex_enter(&lofi_lock);
1104                 klip->li_minor = file_to_minor(klip->li_filename);
1105                 mutex_exit(&lofi_lock);
1106                 if (klip->li_minor == 0) {
1107                         free_lofi_ioctl(klip);
1108                         return (ENOENT);
1109                 }
1110                 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1111                 free_lofi_ioctl(klip);
1112                 return (error);
1113         default:
1114                 free_lofi_ioctl(klip);
1115                 return (EINVAL);
1116         }
1117 
1118 }
1119 
1120 static int
1121 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
1122     int *rvalp)
1123 {
1124         int     error;
1125         enum dkio_state dkstate;
1126         struct lofi_state *lsp;
1127         minor_t minor;
1128 
1129 #ifdef lint
1130         credp = credp;
1131 #endif
1132 
1133         minor = getminor(dev);
1134         /* lofi ioctls only apply to the master device */
1135         if (minor == 0) {
1136                 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg;
1137 
1138                 /*
1139                  * the query command only need read-access - i.e., normal
1140                  * users are allowed to do those on the ctl device as
1141                  * long as they can open it read-only.
1142                  */
1143                 switch (cmd) {
1144                 case LOFI_MAP_FILE:
1145                         if ((flag & FWRITE) == 0)
1146                                 return (EPERM);
1147                         return (lofi_map_file(dev, lip, 1, rvalp, credp, flag));
1148                 case LOFI_MAP_FILE_MINOR:
1149                         if ((flag & FWRITE) == 0)
1150                                 return (EPERM);
1151                         return (lofi_map_file(dev, lip, 0, rvalp, credp, flag));
1152                 case LOFI_UNMAP_FILE:
1153                         if ((flag & FWRITE) == 0)
1154                                 return (EPERM);
1155                         return (lofi_unmap_file(dev, lip, 1, credp, flag));
1156                 case LOFI_UNMAP_FILE_MINOR:
1157                         if ((flag & FWRITE) == 0)
1158                                 return (EPERM);
1159                         return (lofi_unmap_file(dev, lip, 0, credp, flag));
1160                 case LOFI_GET_FILENAME:
1161                         return (lofi_get_info(dev, lip, LOFI_GET_FILENAME,
1162                             credp, flag));
1163                 case LOFI_GET_MINOR:
1164                         return (lofi_get_info(dev, lip, LOFI_GET_MINOR,
1165                             credp, flag));
1166                 case LOFI_GET_MAXMINOR:
1167                         error = ddi_copyout(&lofi_max_files, &lip->li_minor,
1168                             sizeof (lofi_max_files), flag);
1169                         if (error)
1170                                 return (EFAULT);
1171                         return (0);
1172                 default:
1173                         break;
1174                 }
1175         }
1176 
1177         lsp = ddi_get_soft_state(lofi_statep, minor);
1178         if (lsp == NULL)
1179                 return (ENXIO);
1180 
1181         /*
1182          * We explicitly allow DKIOCSTATE, but all other ioctls should fail with
1183          * EIO as if the device was no longer present.
1184          */
1185         if (lsp->ls_vp == NULL && cmd != DKIOCSTATE)
1186                 return (EIO);
1187 
1188         /* these are for faking out utilities like newfs */
1189         switch (cmd) {
1190         case DKIOCGVTOC:
1191                 switch (ddi_model_convert_from(flag & FMODELS)) {
1192                 case DDI_MODEL_ILP32: {
1193                         struct vtoc32 vtoc32;
1194 
1195                         vtoctovtoc32(lsp->ls_vtoc, vtoc32);
1196                         if (ddi_copyout(&vtoc32, (void *)arg,
1197                             sizeof (struct vtoc32), flag))
1198                                 return (EFAULT);
1199                                 break;
1200                         }
1201 
1202                 case DDI_MODEL_NONE:
1203                         if (ddi_copyout(&lsp->ls_vtoc, (void *)arg,
1204                             sizeof (struct vtoc), flag))
1205                                 return (EFAULT);
1206                         break;
1207                 }
1208                 return (0);
1209         case DKIOCINFO:
1210                 error = ddi_copyout(&lsp->ls_ci, (void *)arg,
1211                     sizeof (struct dk_cinfo), flag);
1212                 if (error)
1213                         return (EFAULT);
1214                 return (0);
1215         case DKIOCG_VIRTGEOM:
1216         case DKIOCG_PHYGEOM:
1217         case DKIOCGGEOM:
1218                 error = ddi_copyout(&lsp->ls_dkg, (void *)arg,
1219                     sizeof (struct dk_geom), flag);
1220                 if (error)
1221                         return (EFAULT);
1222                 return (0);
1223         case DKIOCSTATE:
1224                 /*
1225                  * Normally, lofi devices are always in the INSERTED state.  If
1226                  * a device is forcefully unmapped, then the device transitions
1227                  * to the DKIO_DEV_GONE state.
1228                  */
1229                 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate),
1230                     flag) != 0)
1231                         return (EFAULT);
1232 
1233                 mutex_enter(&lsp->ls_vp_lock);
1234                 while ((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) ||
1235                     (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) {
1236                         /*
1237                          * By virtue of having the device open, we know that
1238                          * 'lsp' will remain valid when we return.
1239                          */
1240                         if (!cv_wait_sig(&lsp->ls_vp_cv,
1241                             &lsp->ls_vp_lock)) {
1242                                 mutex_exit(&lsp->ls_vp_lock);
1243                                 return (EINTR);
1244                         }
1245                 }
1246 
1247                 dkstate = (lsp->ls_vp != NULL ? DKIO_INSERTED : DKIO_DEV_GONE);
1248                 mutex_exit(&lsp->ls_vp_lock);
1249 
1250                 if (ddi_copyout(&dkstate, (void *)arg,
1251                     sizeof (dkstate), flag) != 0)
1252                         return (EFAULT);
1253                 return (0);
1254         default:
1255                 return (ENOTTY);
1256         }
1257 }
1258 
1259 static struct cb_ops lofi_cb_ops = {
1260         lofi_open,              /* open */
1261         lofi_close,             /* close */
1262         lofi_strategy,          /* strategy */
1263         nodev,                  /* print */
1264         nodev,                  /* dump */
1265         lofi_read,              /* read */
1266         lofi_write,             /* write */
1267         lofi_ioctl,             /* ioctl */
1268         nodev,                  /* devmap */
1269         nodev,                  /* mmap */
1270         nodev,                  /* segmap */
1271         nochpoll,               /* poll */
1272         ddi_prop_op,            /* prop_op */
1273         0,                      /* streamtab  */
1274         D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */
1275         CB_REV,
1276         lofi_aread,
1277         lofi_awrite
1278 };
1279 
1280 static struct dev_ops lofi_ops = {
1281         DEVO_REV,               /* devo_rev, */
1282         0,                      /* refcnt  */
1283         lofi_info,              /* info */
1284         nulldev,                /* identify */
1285         nulldev,                /* probe */
1286         lofi_attach,            /* attach */
1287         lofi_detach,            /* detach */
1288         nodev,                  /* reset */
1289         &lofi_cb_ops,               /* driver operations */
1290         NULL                    /* no bus operations */
1291 };
1292 
1293 static struct modldrv modldrv = {
1294         &mod_driverops,
1295         "loopback file driver (%I%)",
1296         &lofi_ops,
1297 };
1298 
1299 static struct modlinkage modlinkage = {
1300         MODREV_1,
1301         &modldrv,
1302         NULL
1303 };
1304 
1305 int
1306 _init(void)
1307 {
1308         int error;
1309 
1310         error = ddi_soft_state_init(&lofi_statep,
1311             sizeof (struct lofi_state), 0);
1312         if (error)
1313                 return (error);
1314 
1315         mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL);
1316         error = mod_install(&modlinkage);
1317         if (error) {
1318                 mutex_destroy(&lofi_lock);
1319                 ddi_soft_state_fini(&lofi_statep);
1320         }
1321 
1322         return (error);
1323 }
1324 
1325 int
1326 _fini(void)
1327 {
1328         int     error;
1329 
1330         if (lofi_busy())
1331                 return (EBUSY);
1332 
1333         error = mod_remove(&modlinkage);
1334         if (error)
1335                 return (error);
1336 
1337         mutex_destroy(&lofi_lock);
1338         ddi_soft_state_fini(&lofi_statep);
1339 
1340         return (error);
1341 }
1342 
1343 int
1344 _info(struct modinfo *modinfop)
1345 {
1346         return (mod_info(&modlinkage, modinfop));
1347 }