Print this page
PSARC 2007/569 lofi(7D) compression support
6618343 lofi compression support
6603856 Lofi(7D) can thrash the page cache

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/lofi.c
          +++ new/usr/src/uts/common/io/lofi.c
↓ open down ↓ 91 lines elided ↑ open up ↑
  92   92   *      Pass-through ioctls on block devices. You can (though it's not
  93   93   *      documented), give lofi a block device as a file name. Then we shouldn't
  94   94   *      need to fake a geometry. But this is also silly unless you're replacing
  95   95   *      metadisk.
  96   96   *
  97   97   *      Encryption. tpm would like this. Apparently Windows 2000 has it, and
  98   98   *      so does Linux.
  99   99   */
 100  100  
 101  101  #include <sys/types.h>
      102 +#include <netinet/in.h>
 102  103  #include <sys/sysmacros.h>
 103  104  #include <sys/cmn_err.h>
 104  105  #include <sys/uio.h>
 105  106  #include <sys/kmem.h>
 106  107  #include <sys/cred.h>
 107  108  #include <sys/mman.h>
 108  109  #include <sys/errno.h>
 109  110  #include <sys/aio_req.h>
 110  111  #include <sys/stat.h>
 111  112  #include <sys/file.h>
↓ open down ↓ 4 lines elided ↑ open up ↑
 116  117  #include <sys/lofi.h>
 117  118  #include <sys/fcntl.h>
 118  119  #include <sys/pathname.h>
 119  120  #include <sys/filio.h>
 120  121  #include <sys/fdio.h>
 121  122  #include <sys/open.h>
 122  123  #include <sys/disp.h>
 123  124  #include <vm/seg_map.h>
 124  125  #include <sys/ddi.h>
 125  126  #include <sys/sunddi.h>
      127 +#include <sys/zmod.h>
 126  128  
 127      -/* seems safer than having to get the string right many times */
 128  129  #define NBLOCKS_PROP_NAME       "Nblocks"
 129      -#define SIZE_PROP_NAME  "Size"
      130 +#define SIZE_PROP_NAME          "Size"
 130  131  
 131  132  static dev_info_t *lofi_dip;
 132  133  static void     *lofi_statep;
 133  134  static kmutex_t lofi_lock;              /* state lock */
 134  135  
 135  136  /*
 136  137   * Because lofi_taskq_nthreads limits the actual swamping of the device, the
 137  138   * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
 138  139   * high.  If we want to be assured that the underlying device is always busy,
 139  140   * we must be sure that the number of bytes enqueued when the number of
↓ open down ↓ 2 lines elided ↑ open up ↑
 142  143   * set maxalloc to be the maximum throughput (in bytes per second) of the
 143  144   * underlying device divided by the minimum I/O size.  We assume a realistic
 144  145   * maximum throughput of one hundred megabytes per second; we set maxalloc on
 145  146   * the lofi task queue to be 104857600 divided by DEV_BSIZE.
 146  147   */
 147  148  static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE;
 148  149  static int lofi_taskq_nthreads = 4;     /* # of taskq threads per device */
 149  150  
 150  151  uint32_t lofi_max_files = LOFI_MAX_FILES;
 151  152  
      153 +static int gzip_decompress(void *src, size_t srclen, void *dst,
      154 +        size_t *destlen, int level);
      155 +
      156 +lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = {
      157 +        {gzip_decompress,       NULL,   6,      "gzip"}, /* default */
      158 +        {gzip_decompress,       NULL,   6,      "gzip-6"},
      159 +        {gzip_decompress,       NULL,   9,      "gzip-9"}
      160 +};
      161 +
 152  162  static int
 153  163  lofi_busy(void)
 154  164  {
 155  165          minor_t minor;
 156  166  
 157  167          /*
 158  168           * We need to make sure no mappings exist - mod_remove won't
 159  169           * help because the device isn't open.
 160  170           */
 161  171          mutex_enter(&lofi_lock);
↓ open down ↓ 54 lines elided ↑ open up ↑
 216  226  }
 217  227  
 218  228  static void
 219  229  lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp,
 220  230      cred_t *credp)
 221  231  {
 222  232          dev_t   newdev;
 223  233          char    namebuf[50];
 224  234  
 225  235          if (lsp->ls_vp) {
 226      -                (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag,
 227      -                    1, 0, credp, NULL);
      236 +                (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0, credp);
 228  237                  VN_RELE(lsp->ls_vp);
 229  238                  lsp->ls_vp = NULL;
 230  239          }
 231  240  
 232  241          newdev = makedevice(getmajor(dev), minor);
 233  242          (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
 234  243          (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
 235  244  
 236  245          (void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
 237  246          ddi_remove_minor_node(lofi_dip, namebuf);
↓ open down ↓ 79 lines elided ↑ open up ↑
 317  326          /*
 318  327           * If we have forcibly closed the underlying device, and this is the
 319  328           * last close, then tear down the rest of the device.
 320  329           */
 321  330          if (minor != 0 && lsp->ls_vp == NULL && !is_opened(lsp))
 322  331                  lofi_free_handle(dev, minor, lsp, credp);
 323  332          mutex_exit(&lofi_lock);
 324  333          return (0);
 325  334  }
 326  335  
      336 +static int
      337 +lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
      338 +        struct lofi_state *lsp)
      339 +{
      340 +        int error;
      341 +        offset_t alignedoffset, mapoffset;
      342 +        size_t  xfersize;
      343 +        int     isread;
      344 +        int     smflags;
      345 +        caddr_t mapaddr;
      346 +        size_t  len;
      347 +        enum seg_rw srw;
      348 +
      349 +        /*
      350 +         * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
      351 +         * an 8K boundary, but the buf transfer address may not be
      352 +         * aligned on more than a 512-byte boundary (we don't enforce
      353 +         * that even though we could). This matters since the initial
      354 +         * part of the transfer may not start at offset 0 within the
      355 +         * segmap'd chunk. So we have to compensate for that with
      356 +         * 'mapoffset'. Subsequent chunks always start off at the
      357 +         * beginning, and the last is capped by b_resid
      358 +         */
      359 +        mapoffset = offset & MAXBOFFSET;
      360 +        alignedoffset = offset - mapoffset;
      361 +        bp->b_resid = bp->b_bcount;
      362 +        isread = bp->b_flags & B_READ;
      363 +        srw = isread ? S_READ : S_WRITE;
      364 +        do {
      365 +                xfersize = MIN(lsp->ls_vp_comp_size - offset,
      366 +                    MIN(MAXBSIZE - mapoffset, bp->b_resid));
      367 +                len = roundup(mapoffset + xfersize, PAGESIZE);
      368 +                mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp,
      369 +                    alignedoffset, MAXBSIZE, 1, srw);
      370 +                /*
      371 +                 * Now fault in the pages. This lets us check
      372 +                 * for errors before we reference mapaddr and
      373 +                 * try to resolve the fault in bcopy (which would
      374 +                 * panic instead). And this can easily happen,
      375 +                 * particularly if you've lofi'd a file over NFS
      376 +                 * and someone deletes the file on the server.
      377 +                 */
      378 +                error = segmap_fault(kas.a_hat, segkmap, mapaddr,
      379 +                    len, F_SOFTLOCK, srw);
      380 +                if (error) {
      381 +                        (void) segmap_release(segkmap, mapaddr, 0);
      382 +                        if (FC_CODE(error) == FC_OBJERR)
      383 +                                error = FC_ERRNO(error);
      384 +                        else
      385 +                                error = EIO;
      386 +                        break;
      387 +                }
      388 +                smflags = 0;
      389 +                if (isread) {
      390 +                        smflags |= SM_FREE;
      391 +                        /*
      392 +                         * If we're reading an entire page starting
      393 +                         * at a page boundary, there's a good chance
      394 +                         * we won't need it again. Put it on the
      395 +                         * head of the freelist.
      396 +                         */
      397 +                        if (mapoffset == 0 && xfersize == PAGESIZE)
      398 +                                smflags |= SM_DONTNEED;
      399 +                        bcopy(mapaddr + mapoffset, bufaddr, xfersize);
      400 +                } else {
      401 +                        smflags |= SM_WRITE;
      402 +                        bcopy(bufaddr, mapaddr + mapoffset, xfersize);
      403 +                }
      404 +                bp->b_resid -= xfersize;
      405 +                bufaddr += xfersize;
      406 +                offset += xfersize;
      407 +                (void) segmap_fault(kas.a_hat, segkmap, mapaddr,
      408 +                    len, F_SOFTUNLOCK, srw);
      409 +                error = segmap_release(segkmap, mapaddr, smflags);
      410 +                /* only the first map may start partial */
      411 +                mapoffset = 0;
      412 +                alignedoffset += MAXBSIZE;
      413 +        } while ((error == 0) && (bp->b_resid > 0) &&
      414 +            (offset < lsp->ls_vp_comp_size));
      415 +
      416 +        return (error);
      417 +}
      418 +
      419 +/*ARGSUSED*/
      420 +static int gzip_decompress(void *src, size_t srclen, void *dst,
      421 +    size_t *dstlen, int level)
      422 +{
      423 +        ASSERT(*dstlen >= srclen);
      424 +
      425 +        if (z_uncompress(dst, dstlen, src, srclen) != Z_OK)
      426 +                return (-1);
      427 +        return (0);
      428 +}
      429 +
 327  430  /*
 328  431   * This is basically what strategy used to be before we found we
 329  432   * needed task queues.
 330  433   */
 331  434  static void
 332  435  lofi_strategy_task(void *arg)
 333  436  {
 334  437          struct buf *bp = (struct buf *)arg;
 335  438          int error;
 336  439          struct lofi_state *lsp;
 337      -        offset_t        offset, alignedoffset;
 338      -        offset_t        mapoffset;
 339      -        caddr_t bufaddr;
 340      -        caddr_t mapaddr;
 341      -        size_t  xfersize;
 342      -        size_t  len;
 343      -        int     isread;
 344      -        int     smflags;
 345      -        enum seg_rw srw;
      440 +        uint64_t sblkno, eblkno, cmpbytes;
      441 +        offset_t offset, sblkoff, eblkoff;
      442 +        offset_t salign, ealign;
      443 +        offset_t sdiff;
      444 +        uint32_t comp_data_sz;
      445 +        caddr_t bufaddr;
      446 +        unsigned char *compressed_seg = NULL, *cmpbuf;
      447 +        unsigned char *uncompressed_seg = NULL;
      448 +        lofi_compress_info_t *li;
      449 +        size_t oblkcount, xfersize;
      450 +        unsigned long seglen;
 346  451  
 347  452          lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
 348  453          if (lsp->ls_kstat) {
 349  454                  mutex_enter(lsp->ls_kstat->ks_lock);
 350  455                  kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat));
 351  456                  mutex_exit(lsp->ls_kstat->ks_lock);
 352  457          }
 353  458          bp_mapin(bp);
 354  459          bufaddr = bp->b_un.b_addr;
 355  460          offset = bp->b_lblkno * DEV_BSIZE;      /* offset within file */
↓ open down ↓ 2 lines elided ↑ open up ↑
 358  463           * We used to always use vn_rdwr here, but we cannot do that because
 359  464           * we might decide to read or write from the the underlying
 360  465           * file during this call, which would be a deadlock because
 361  466           * we have the rw_lock. So instead we page, unless it's not
 362  467           * mapable or it's a character device.
 363  468           */
 364  469          if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
 365  470                  error = EIO;
 366  471          } else if (((lsp->ls_vp->v_flag & VNOMAP) == 0) &&
 367  472              (lsp->ls_vp->v_type != VCHR)) {
      473 +                uint64_t i;
      474 +
 368  475                  /*
 369      -                 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
 370      -                 * an 8K boundary, but the buf transfer address may not be
 371      -                 * aligned on more than a 512-byte boundary (we don't
 372      -                 * enforce that, though we could). This matters since the
 373      -                 * initial part of the transfer may not start at offset 0
 374      -                 * within the segmap'd chunk. So we have to compensate for
 375      -                 * that with 'mapoffset'. Subsequent chunks always start
 376      -                 * off at the beginning, and the last is capped by b_resid.
      476 +                 * Handle uncompressed files with a regular read
 377  477                   */
 378      -                mapoffset = offset & MAXBOFFSET;
 379      -                alignedoffset = offset - mapoffset;     /* now map-aligned */
 380      -                bp->b_resid = bp->b_bcount;
 381      -                isread = bp->b_flags & B_READ;
 382      -                srw = isread ? S_READ : S_WRITE;
 383      -                do {
 384      -                        xfersize = MIN(lsp->ls_vp_size - offset,
 385      -                            MIN(MAXBSIZE - mapoffset, bp->b_resid));
 386      -                        len = roundup(mapoffset + xfersize, PAGESIZE);
 387      -                        mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp,
 388      -                            alignedoffset, MAXBSIZE, 1, srw);
      478 +                if (lsp->ls_uncomp_seg_sz == 0) {
      479 +                        error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp);
      480 +                        goto done;
      481 +                }
      482 +
      483 +                /*
      484 +                 * From here on we're dealing primarily with compressed files
      485 +                 */
      486 +
      487 +                /*
      488 +                 * Compressed files can only be read from and
      489 +                 * not written to
      490 +                 */
      491 +                if (!(bp->b_flags & B_READ)) {
      492 +                        bp->b_resid = bp->b_bcount;
      493 +                        error = EROFS;
      494 +                        goto done;
      495 +                }
      496 +
      497 +                ASSERT(lsp->ls_comp_algorithm_index >= 0);
      498 +                li = &lofi_compress_table[lsp->ls_comp_algorithm_index];
      499 +                /*
      500 +                 * Compute starting and ending compressed segment numbers
      501 +                 * We use only bitwise operations avoiding division and
      502 +                 * modulus because we enforce the compression segment size
      503 +                 * to a power of 2
      504 +                 */
      505 +                sblkno = offset >> lsp->ls_comp_seg_shift;
      506 +                sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1);
      507 +                eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift;
      508 +                eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1);
      509 +
      510 +                /*
      511 +                 * Align start offset to block boundary for segmap
      512 +                 */
      513 +                salign = lsp->ls_comp_seg_index[sblkno];
      514 +                sdiff = salign & (DEV_BSIZE - 1);
      515 +                salign -= sdiff;
      516 +                if (eblkno >= (lsp->ls_comp_index_sz - 1)) {
 389  517                          /*
 390      -                         * Now fault in the pages. This lets us check
 391      -                         * for errors before we reference mapaddr and
 392      -                         * try to resolve the fault in bcopy (which would
 393      -                         * panic instead). And this can easily happen,
 394      -                         * particularly if you've lofi'd a file over NFS
 395      -                         * and someone deletes the file on the server.
      518 +                         * We're dealing with the last segment of
      519 +                         * the compressed file -- the size of this
      520 +                         * segment *may not* be the same as the
      521 +                         * segment size for the file
 396  522                           */
 397      -                        error = segmap_fault(kas.a_hat, segkmap, mapaddr,
 398      -                            len, F_SOFTLOCK, srw);
 399      -                        if (error) {
 400      -                                (void) segmap_release(segkmap, mapaddr, 0);
 401      -                                if (FC_CODE(error) == FC_OBJERR)
 402      -                                        error = FC_ERRNO(error);
 403      -                                else
 404      -                                        error = EIO;
 405      -                                break;
      523 +                        eblkoff = (offset + bp->b_bcount) &
      524 +                            (lsp->ls_uncomp_last_seg_sz - 1);
      525 +                        ealign = lsp->ls_vp_comp_size;
      526 +                } else {
      527 +                        ealign = lsp->ls_comp_seg_index[eblkno + 1];
      528 +                }
      529 +
      530 +                /*
      531 +                 * Preserve original request paramaters
      532 +                 */
      533 +                oblkcount = bp->b_bcount;
      534 +
      535 +                /*
      536 +                 * Assign the calculated parameters
      537 +                 */
      538 +                comp_data_sz = ealign - salign;
      539 +                bp->b_bcount = comp_data_sz;
      540 +
      541 +                /*
      542 +                 * Allocate fixed size memory blocks to hold one
      543 +                 * compressed and uncompressed segment since we
      544 +                 * uncompress segments one at a time
      545 +                 */
      546 +                compressed_seg = kmem_alloc(bp->b_bcount, KM_SLEEP);
      547 +                uncompressed_seg = kmem_alloc(lsp->ls_uncomp_seg_sz, KM_SLEEP);
      548 +                /*
      549 +                 * Map in the calculated number of blocks
      550 +                 */
      551 +                error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign,
      552 +                    bp, lsp);
      553 +
      554 +                bp->b_bcount = oblkcount;
      555 +                bp->b_resid = oblkcount;
      556 +                if (error != 0)
      557 +                        goto done;
      558 +
      559 +                /*
      560 +                 * We have the compressed blocks, now uncompress them
      561 +                 */
      562 +                cmpbuf = compressed_seg + sdiff;
      563 +                for (i = sblkno; i < (eblkno + 1) && i < lsp->ls_comp_index_sz;
      564 +                    i++) {
      565 +                        /*
      566 +                         * Each of the segment index entries contains
      567 +                         * the starting block number for that segment.
      568 +                         * The number of compressed bytes in a segment
      569 +                         * is thus the difference between the starting
      570 +                         * block number of this segment and the starting
      571 +                         * block number of the next segment.
      572 +                         */
      573 +                        if ((i == eblkno) &&
      574 +                            (i == lsp->ls_comp_index_sz - 1)) {
      575 +                                cmpbytes = lsp->ls_vp_comp_size -
      576 +                                    lsp->ls_comp_seg_index[i];
      577 +                        } else {
      578 +                                cmpbytes = lsp->ls_comp_seg_index[i + 1] -
      579 +                                    lsp->ls_comp_seg_index[i];
 406  580                          }
 407      -                        smflags = 0;
 408      -                        if (isread) {
 409      -                                bcopy(mapaddr + mapoffset, bufaddr, xfersize);
      581 +
      582 +                        /*
      583 +                         * The first byte in a compressed segment is a flag
      584 +                         * that indicates whether is this segment is
      585 +                         * compressed at all
      586 +                         */
      587 +                        if (*cmpbuf == UNCOMPRESSED) {
      588 +                                bcopy((cmpbuf + SEGHDR), uncompressed_seg,
      589 +                                    (cmpbytes - SEGHDR));
 410  590                          } else {
 411      -                                smflags |= SM_WRITE;
 412      -                                bcopy(bufaddr, mapaddr + mapoffset, xfersize);
      591 +                                seglen = lsp->ls_uncomp_seg_sz;
      592 +
      593 +                                if (li->l_decompress((cmpbuf + SEGHDR),
      594 +                                    (cmpbytes - SEGHDR), uncompressed_seg,
      595 +                                    &seglen, li->l_level) != 0) {
      596 +                                        error = EIO;
      597 +                                        goto done;
      598 +                                }
 413  599                          }
 414      -                        bp->b_resid -= xfersize;
      600 +
      601 +                        /*
      602 +                         * Determine how much uncompressed data we
      603 +                         * have to copy and copy it
      604 +                         */
      605 +                        xfersize = lsp->ls_uncomp_seg_sz - sblkoff;
      606 +                        if (i == eblkno) {
      607 +                                if (i == (lsp->ls_comp_index_sz - 1))
      608 +                                        xfersize -= (lsp->ls_uncomp_last_seg_sz
      609 +                                            - eblkoff);
      610 +                                else
      611 +                                        xfersize -=
      612 +                                            (lsp->ls_uncomp_seg_sz - eblkoff);
      613 +                        }
      614 +
      615 +                        bcopy((uncompressed_seg + sblkoff), bufaddr, xfersize);
      616 +
      617 +                        cmpbuf += cmpbytes;
 415  618                          bufaddr += xfersize;
 416      -                        offset += xfersize;
 417      -                        (void) segmap_fault(kas.a_hat, segkmap, mapaddr,
 418      -                            len, F_SOFTUNLOCK, srw);
 419      -                        error = segmap_release(segkmap, mapaddr, smflags);
 420      -                        /* only the first map may start partial */
 421      -                        mapoffset = 0;
 422      -                        alignedoffset += MAXBSIZE;
 423      -                } while ((error == 0) && (bp->b_resid > 0) &&
 424      -                    (offset < lsp->ls_vp_size));
      619 +                        bp->b_resid -= xfersize;
      620 +                        sblkoff = 0;
      621 +
      622 +                        if (bp->b_resid == 0)
      623 +                                break;
      624 +                }
 425  625          } else {
 426  626                  ssize_t resid;
 427  627                  enum uio_rw rw;
 428  628  
 429  629                  if (bp->b_flags & B_READ)
 430  630                          rw = UIO_READ;
 431  631                  else
 432  632                          rw = UIO_WRITE;
 433  633                  error = vn_rdwr(rw, lsp->ls_vp, bufaddr, bp->b_bcount,
 434  634                      offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
 435  635                  bp->b_resid = resid;
 436  636          }
 437  637  
      638 +done:
      639 +        if (compressed_seg != NULL)
      640 +                kmem_free(compressed_seg, comp_data_sz);
      641 +        if (uncompressed_seg != NULL)
      642 +                kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz);
      643 +
 438  644          if (lsp->ls_kstat) {
 439  645                  size_t n_done = bp->b_bcount - bp->b_resid;
 440  646                  kstat_io_t *kioptr;
 441  647  
 442  648                  mutex_enter(lsp->ls_kstat->ks_lock);
 443  649                  kioptr = KSTAT_IO_PTR(lsp->ls_kstat);
 444  650                  if (bp->b_flags & B_READ) {
 445  651                          kioptr->nread += n_done;
 446  652                          kioptr->reads++;
 447  653                  } else {
↓ open down ↓ 176 lines elided ↑ open up ↑
 624  830          int     error;
 625  831  
 626  832          klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP);
 627  833          error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag);
 628  834          if (error) {
 629  835                  kmem_free(klip, sizeof (struct lofi_ioctl));
 630  836                  return (NULL);
 631  837          }
 632  838  
 633  839          /* make sure filename is always null-terminated */
 634      -        klip->li_filename[MAXPATHLEN] = '\0';
      840 +        klip->li_filename[MAXPATHLEN - 1] = '\0';
 635  841  
 636  842          /* validate minor number */
 637  843          if (klip->li_minor > lofi_max_files) {
 638  844                  kmem_free(klip, sizeof (struct lofi_ioctl));
 639  845                  return (NULL);
 640  846          }
 641  847          return (klip);
 642  848  }
 643  849  
 644  850  int
↓ open down ↓ 101 lines elided ↑ open up ↑
 746  952          lsp->ls_dkg.dkg_read_reinstruct = 0;
 747  953  
 748  954          /* vtoc - see dkio(7I) */
 749  955          bzero(&lsp->ls_vtoc, sizeof (struct vtoc));
 750  956          lsp->ls_vtoc.v_sanity = VTOC_SANE;
 751  957          lsp->ls_vtoc.v_version = V_VERSION;
 752  958          bcopy(LOFI_DRIVER_NAME, lsp->ls_vtoc.v_volume, 7);
 753  959          lsp->ls_vtoc.v_sectorsz = DEV_BSIZE;
 754  960          lsp->ls_vtoc.v_nparts = 1;
 755  961          lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED;
 756      -        lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT;
      962 +
      963 +        /*
      964 +         * A compressed file is read-only, other files can
      965 +         * be read-write
      966 +         */
      967 +        if (lsp->ls_uncomp_seg_sz > 0) {
      968 +                lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY;
      969 +        } else {
      970 +                lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT;
      971 +        }
 757  972          lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0;
 758  973          /*
 759  974           * The partition size cannot just be the number of sectors, because
 760  975           * that might not end on a cylinder boundary. And if that's the case,
 761  976           * newfs/mkfs will print a scary warning. So just figure the size
 762  977           * based on the number of cylinders and sectors/cylinder.
 763  978           */
 764  979          lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl *
 765  980              lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead;
 766  981  
↓ open down ↓ 14 lines elided ↑ open up ↑
 781  996          /*
 782  997           * newfs uses this to set maxcontig. Must not be < 16, or it
 783  998           * will be 0 when newfs multiplies it by DEV_BSIZE and divides
 784  999           * it by the block size. Then tunefs doesn't work because
 785 1000           * maxcontig is 0.
 786 1001           */
 787 1002          lsp->ls_ci.dki_maxtransfer = 16;
 788 1003  }
 789 1004  
 790 1005  /*
     1006 + * map in a compressed file
     1007 + *
     1008 + * Read in the header and the index that follows.
     1009 + *
     1010 + * The header is as follows -
     1011 + *
     1012 + * Signature (name of the compression algorithm)
     1013 + * Compression segment size (a multiple of 512)
     1014 + * Number of index entries
     1015 + * Size of the last block
     1016 + * The array containing the index entries
     1017 + *
     1018 + * The header information is always stored in
     1019 + * network byte order on disk.
     1020 + */
     1021 +static int
     1022 +lofi_map_compressed_file(struct lofi_state *lsp, char *buf)
     1023 +{
     1024 +        uint32_t index_sz, header_len, i;
     1025 +        ssize_t resid;
     1026 +        enum uio_rw rw;
     1027 +        char *tbuf = buf;
     1028 +        int error;
     1029 +
     1030 +        /* The signature has already been read */
     1031 +        tbuf += lsp->ls_comp_algorithm_len;
     1032 +        bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz));
     1033 +        lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz);
     1034 +
     1035 +        /*
     1036 +         * The compressed segment size must be a power of 2
     1037 +         */
     1038 +        if (lsp->ls_uncomp_seg_sz % 2)
     1039 +                return (EINVAL);
     1040 +
     1041 +        for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++)
     1042 +                ;
     1043 +
     1044 +        lsp->ls_comp_seg_shift = i;
     1045 +
     1046 +        tbuf += sizeof (lsp->ls_uncomp_seg_sz);
     1047 +        bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz));
     1048 +        lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz);
     1049 +
     1050 +        tbuf += sizeof (lsp->ls_comp_index_sz);
     1051 +        bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz),
     1052 +            sizeof (lsp->ls_uncomp_last_seg_sz));
     1053 +        lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz);
     1054 +
     1055 +        /*
     1056 +         * Compute the total size of the uncompressed data
     1057 +         * for use in fake_disk_geometry and other calculations.
     1058 +         * Disk geometry has to be faked with respect to the
     1059 +         * actual uncompressed data size rather than the
     1060 +         * compressed file size.
     1061 +         */
     1062 +        /* XXX '2' shouldn't subtracted here - should be '1' */
     1063 +        lsp->ls_vp_size = (lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz
     1064 +            + lsp->ls_uncomp_last_seg_sz;
     1065 +
     1066 +        /*
     1067 +         * Index size is rounded up to a 512 byte boundary for ease
     1068 +         * of segmapping
     1069 +         */
     1070 +        index_sz = sizeof (lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz;
     1071 +        header_len = lsp->ls_comp_algorithm_len +
     1072 +            sizeof (lsp->ls_uncomp_seg_sz) +
     1073 +            sizeof (lsp->ls_comp_index_sz) +
     1074 +            sizeof (lsp->ls_uncomp_last_seg_sz);
     1075 +        lsp->ls_comp_offbase = header_len + index_sz;
     1076 +
     1077 +        index_sz += header_len;
     1078 +        index_sz = roundup(index_sz, DEV_BSIZE);
     1079 +
     1080 +        lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP);
     1081 +        lsp->ls_comp_index_data_sz = index_sz;
     1082 +
     1083 +        /*
     1084 +         * Read in the index -- this has a side-effect
     1085 +         * of reading in the header as well
     1086 +         */
     1087 +        rw = UIO_READ;
     1088 +        error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz,
     1089 +            0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
     1090 +
     1091 +        if (error != 0)
     1092 +                return (error);
     1093 +
     1094 +        /* Skip the header, this is where the index really begins */
     1095 +        lsp->ls_comp_seg_index =
     1096 +            /*LINTED*/
     1097 +            (uint64_t *)(lsp->ls_comp_index_data + header_len);
     1098 +
     1099 +        /* Now map the index into memory */
     1100 +        for (i = 0; i < lsp->ls_comp_index_sz; i++)
     1101 +                lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase +
     1102 +                    lsp->ls_comp_seg_index[i];
     1103 +
     1104 +        return (error);
     1105 +}
     1106 +
     1107 +/*
     1108 + * Check to see if the passed in signature is a valid
     1109 + * one. If it is valid, return the index into
     1110 + * lofi_compress_table.
     1111 + *
     1112 + * Return -1 if it is invalid
     1113 + */
     1114 +static int lofi_compress_select(char *signature)
     1115 +{
     1116 +        int i;
     1117 +
     1118 +        for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) {
     1119 +                if (strcmp(lofi_compress_table[i].l_name, signature) == 0)
     1120 +                        return (i);
     1121 +        }
     1122 +
     1123 +        return (-1);
     1124 +}
     1125 +
     1126 +/*
 791 1127   * map a file to a minor number. Return the minor number.
 792 1128   */
 793 1129  static int
 794 1130  lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor,
 795 1131      int *rvalp, struct cred *credp, int ioctl_flag)
 796 1132  {
 797 1133          minor_t newminor;
 798 1134          struct lofi_state *lsp;
 799 1135          struct lofi_ioctl *klip;
 800 1136          int     error;
 801 1137          struct vnode *vp;
 802 1138          int64_t Nblocks_prop_val;
 803 1139          int64_t Size_prop_val;
     1140 +        int     compress_index;
 804 1141          vattr_t vattr;
 805 1142          int     flag;
 806 1143          enum vtype v_type;
 807 1144          int zalloced = 0;
 808 1145          dev_t   newdev;
 809 1146          char    namebuf[50];
     1147 +        char    buf[DEV_BSIZE];
     1148 +        char    *tbuf;
     1149 +        ssize_t resid;
     1150 +        enum uio_rw rw;
 810 1151  
 811 1152          klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
 812 1153          if (klip == NULL)
 813 1154                  return (EFAULT);
 814 1155  
 815 1156          mutex_enter(&lofi_lock);
 816 1157  
 817 1158          if (!valid_filename(klip->li_filename)) {
 818 1159                  error = EINVAL;
 819 1160                  goto out;
↓ open down ↓ 38 lines elided ↑ open up ↑
 858 1199          if (error) {
 859 1200                  /* try read-only */
 860 1201                  flag &= ~FWRITE;
 861 1202                  error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0,
 862 1203                      &vp, 0, 0);
 863 1204                  if (error) {
 864 1205                          goto out;
 865 1206                  }
 866 1207          }
 867 1208          vattr.va_mask = AT_SIZE;
 868      -        error = VOP_GETATTR(vp, &vattr, 0, credp, NULL);
     1209 +        error = VOP_GETATTR(vp, &vattr, 0, credp);
 869 1210          if (error) {
 870 1211                  goto closeout;
 871 1212          }
 872 1213          /* the file needs to be a multiple of the block size */
 873 1214          if ((vattr.va_size % DEV_BSIZE) != 0) {
 874 1215                  error = EINVAL;
 875 1216                  goto closeout;
 876 1217          }
 877 1218          newdev = makedevice(getmajor(dev), newminor);
 878 1219          Size_prop_val = vattr.va_size;
↓ open down ↓ 51 lines elided ↑ open up ↑
 930 1271          /*
 931 1272           * save open mode so file can be closed properly and vnode counts
 932 1273           * updated correctly.
 933 1274           */
 934 1275          lsp->ls_openflag = flag;
 935 1276  
 936 1277          /*
 937 1278           * Try to handle stacked lofs vnodes.
 938 1279           */
 939 1280          if (vp->v_type == VREG) {
 940      -                if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) {
     1281 +                if (VOP_REALVP(vp, &lsp->ls_vp) != 0) {
 941 1282                          lsp->ls_vp = vp;
 942 1283                  } else {
 943 1284                          /*
 944 1285                           * Even though vp was obtained via vn_open(), we
 945 1286                           * can't call vn_close() on it, since lofs will
 946 1287                           * pass the VOP_CLOSE() on down to the realvp
 947 1288                           * (which we are about to use). Hence we merely
 948 1289                           * drop the reference to the lofs vnode and hold
 949 1290                           * the realvp so things behave as if we've
 950 1291                           * opened the realvp without any interaction
↓ open down ↓ 4 lines elided ↑ open up ↑
 955 1296                  }
 956 1297          } else {
 957 1298                  lsp->ls_vp = vp;
 958 1299          }
 959 1300          lsp->ls_vp_size = vattr.va_size;
 960 1301          (void) strcpy(lsp->ls_filename, klip->li_filename);
 961 1302          if (rvalp)
 962 1303                  *rvalp = (int)newminor;
 963 1304          klip->li_minor = newminor;
 964 1305  
     1306 +        /*
     1307 +         * Read the file signature to check if it is compressed.
     1308 +         * 'rw' is set to read since only reads are allowed to
     1309 +         * a compressed file.
     1310 +         */
     1311 +        rw = UIO_READ;
     1312 +        error = vn_rdwr(rw, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE,
     1313 +            0, RLIM64_INFINITY, kcred, &resid);
     1314 +
     1315 +        if (error != 0)
     1316 +                goto propout;
     1317 +
     1318 +        tbuf = buf;
     1319 +        lsp->ls_uncomp_seg_sz = 0;
     1320 +        lsp->ls_vp_comp_size = lsp->ls_vp_size;
     1321 +        lsp->ls_comp_algorithm_len = 0;
     1322 +
     1323 +        compress_index = lofi_compress_select(tbuf);
     1324 +        if (compress_index != -1) {
     1325 +                lsp->ls_comp_algorithm_index = compress_index;
     1326 +                lsp->ls_comp_algorithm_len =
     1327 +                    strlen(lofi_compress_table[compress_index].l_name);
     1328 +                error = lofi_map_compressed_file(lsp, buf);
     1329 +                if (error != 0)
     1330 +                        goto propout;
     1331 +
     1332 +                /* update DDI properties */
     1333 +                Size_prop_val = lsp->ls_vp_size;
     1334 +                if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME,
     1335 +                    Size_prop_val)) != DDI_PROP_SUCCESS) {
     1336 +                        error = EINVAL;
     1337 +                        goto propout;
     1338 +                }
     1339 +
     1340 +                Nblocks_prop_val = lsp->ls_vp_size / DEV_BSIZE;
     1341 +                if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME,
     1342 +                    Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
     1343 +                        error = EINVAL;
     1344 +                        goto propout;
     1345 +                }
     1346 +        }
     1347 +
 965 1348          fake_disk_geometry(lsp);
 966 1349          mutex_exit(&lofi_lock);
 967 1350          (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
 968 1351          free_lofi_ioctl(klip);
 969 1352          return (0);
 970 1353  
 971 1354  propout:
 972 1355          (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
 973 1356          (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
 974 1357  closeout:
 975      -        (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL);
     1358 +        (void) VOP_CLOSE(vp, flag, 1, 0, credp);
 976 1359          VN_RELE(vp);
 977 1360  out:
 978 1361          if (zalloced)
 979 1362                  ddi_soft_state_free(lofi_statep, newminor);
 980 1363          mutex_exit(&lofi_lock);
 981 1364          free_lofi_ioctl(klip);
 982 1365          return (error);
 983 1366  }
 984 1367  
 985 1368  /*
↓ open down ↓ 43 lines elided ↑ open up ↑
1029 1412                   * no new I/Os should be dispatched.  We set the flag, wait for
1030 1413                   * the number of outstanding I/Os to reach 0, and then close the
1031 1414                   * underlying vnode.
1032 1415                   */
1033 1416                  if (klip->li_force) {
1034 1417                          mutex_enter(&lsp->ls_vp_lock);
1035 1418                          lsp->ls_vp_closereq = B_TRUE;
1036 1419                          while (lsp->ls_vp_iocount > 0)
1037 1420                                  cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock);
1038 1421                          (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0,
1039      -                            credp, NULL);
     1422 +                            credp);
1040 1423                          VN_RELE(lsp->ls_vp);
1041 1424                          lsp->ls_vp = NULL;
1042 1425                          cv_broadcast(&lsp->ls_vp_cv);
1043 1426                          mutex_exit(&lsp->ls_vp_lock);
1044 1427                          mutex_exit(&lofi_lock);
1045 1428                          klip->li_minor = minor;
1046 1429                          (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1047 1430                          free_lofi_ioctl(klip);
1048 1431                          return (0);
1049 1432                  }
1050 1433                  mutex_exit(&lofi_lock);
1051 1434                  free_lofi_ioctl(klip);
1052 1435                  return (EBUSY);
1053 1436          }
1054 1437  
     1438 +        if (lsp->ls_uncomp_seg_sz > 0) {
     1439 +                kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz);
     1440 +                lsp->ls_uncomp_seg_sz = 0;
     1441 +        }
     1442 +
1055 1443          lofi_free_handle(dev, minor, lsp, credp);
1056 1444  
1057 1445          klip->li_minor = minor;
1058 1446          mutex_exit(&lofi_lock);
1059 1447          (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1060 1448          free_lofi_ioctl(klip);
1061 1449          return (0);
1062 1450  }
1063 1451  
1064 1452  /*
↓ open down ↓ 23 lines elided ↑ open up ↑
1088 1476                  }
1089 1477  
1090 1478                  mutex_enter(&lofi_lock);
1091 1479                  lsp = ddi_get_soft_state(lofi_statep, minor);
1092 1480                  if (lsp == NULL) {
1093 1481                          mutex_exit(&lofi_lock);
1094 1482                          free_lofi_ioctl(klip);
1095 1483                          return (ENXIO);
1096 1484                  }
1097 1485                  (void) strcpy(klip->li_filename, lsp->ls_filename);
     1486 +                if (lsp->ls_comp_algorithm_len == 0)
     1487 +                        klip->li_algorithm[0] = '\0';
     1488 +                else
     1489 +                        (void) strlcpy(klip->li_algorithm, lofi_compress_table[
     1490 +                            lsp->ls_comp_algorithm_index].l_name,
     1491 +                            lsp->ls_comp_algorithm_len + 1);
1098 1492                  mutex_exit(&lofi_lock);
1099 1493                  error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1100 1494                  free_lofi_ioctl(klip);
1101 1495                  return (error);
1102 1496          case LOFI_GET_MINOR:
1103 1497                  mutex_enter(&lofi_lock);
1104 1498                  klip->li_minor = file_to_minor(klip->li_filename);
1105 1499                  mutex_exit(&lofi_lock);
1106 1500                  if (klip->li_minor == 0) {
1107 1501                          free_lofi_ioctl(klip);
1108 1502                          return (ENOENT);
1109 1503                  }
1110 1504                  error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1111 1505                  free_lofi_ioctl(klip);
1112 1506                  return (error);
     1507 +        case LOFI_CHECK_COMPRESSED:
     1508 +                mutex_enter(&lofi_lock);
     1509 +                klip->li_minor = file_to_minor(klip->li_filename);
     1510 +                mutex_exit(&lofi_lock);
     1511 +                if (klip->li_minor == 0) {
     1512 +                        free_lofi_ioctl(klip);
     1513 +                        return (ENOENT);
     1514 +                }
     1515 +                mutex_enter(&lofi_lock);
     1516 +                lsp = ddi_get_soft_state(lofi_statep, klip->li_minor);
     1517 +                if (lsp == NULL) {
     1518 +                        mutex_exit(&lofi_lock);
     1519 +                        free_lofi_ioctl(klip);
     1520 +                        return (ENXIO);
     1521 +                }
     1522 +                ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0);
     1523 +
     1524 +                if (lsp->ls_comp_algorithm_len == 0)
     1525 +                        klip->li_algorithm[0] = '\0';
     1526 +                else
     1527 +                        (void) strlcpy(klip->li_algorithm, lofi_compress_table[
     1528 +                            lsp->ls_comp_algorithm_index].l_name,
     1529 +                            lsp->ls_comp_algorithm_len + 1);
     1530 +
     1531 +                mutex_exit(&lofi_lock);
     1532 +                error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
     1533 +                free_lofi_ioctl(klip);
     1534 +                return (error);
1113 1535          default:
1114 1536                  free_lofi_ioctl(klip);
1115 1537                  return (EINVAL);
1116 1538          }
1117 1539  
1118 1540  }
1119 1541  
1120 1542  static int
1121 1543  lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
1122 1544      int *rvalp)
↓ open down ↓ 39 lines elided ↑ open up ↑
1162 1584                              credp, flag));
1163 1585                  case LOFI_GET_MINOR:
1164 1586                          return (lofi_get_info(dev, lip, LOFI_GET_MINOR,
1165 1587                              credp, flag));
1166 1588                  case LOFI_GET_MAXMINOR:
1167 1589                          error = ddi_copyout(&lofi_max_files, &lip->li_minor,
1168 1590                              sizeof (lofi_max_files), flag);
1169 1591                          if (error)
1170 1592                                  return (EFAULT);
1171 1593                          return (0);
     1594 +                case LOFI_CHECK_COMPRESSED:
     1595 +                        return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED,
     1596 +                            credp, flag));
1172 1597                  default:
1173 1598                          break;
1174 1599                  }
1175 1600          }
1176 1601  
1177 1602          lsp = ddi_get_soft_state(lofi_statep, minor);
1178 1603          if (lsp == NULL)
1179 1604                  return (ENXIO);
1180 1605  
1181 1606          /*
↓ open down ↓ 166 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX