1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "@(#)pc_vfsops.c        1.105   08/05/07 SMI"
  27 
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/kmem.h>
  31 #include <sys/user.h>
  32 #include <sys/proc.h>
  33 #include <sys/cred.h>
  34 #include <sys/disp.h>
  35 #include <sys/buf.h>
  36 #include <sys/vfs.h>
  37 #include <sys/vfs_opreg.h>
  38 #include <sys/vnode.h>
  39 #include <sys/fdio.h>
  40 #include <sys/file.h>
  41 #include <sys/uio.h>
  42 #include <sys/conf.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/mount.h>
  45 #include <sys/pathname.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/debug.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/conf.h>
  50 #include <sys/mkdev.h>
  51 #include <sys/swap.h>
  52 #include <sys/sunddi.h>
  53 #include <sys/sunldi.h>
  54 #include <sys/dktp/fdisk.h>
  55 #include <sys/fs/pc_label.h>
  56 #include <sys/fs/pc_fs.h>
  57 #include <sys/fs/pc_dir.h>
  58 #include <sys/fs/pc_node.h>
  59 #include <fs/fs_subr.h>
  60 #include <sys/modctl.h>
  61 #include <sys/dkio.h>
  62 #include <sys/open.h>
  63 #include <sys/mntent.h>
  64 #include <sys/policy.h>
  65 #include <sys/atomic.h>
  66 #include <sys/sdt.h>
  67 
  68 /*
  69  * The majority of PC media use a 512 sector size, but
  70  * occasionally you will run across a 1k sector size.
  71  * For media with a 1k sector size, fd_strategy() requires
  72  * the I/O size to be a 1k multiple; so when the sector size
  73  * is not yet known, always read 1k.
  74  */
  75 #define PC_SAFESECSIZE  (PC_SECSIZE * 2)
  76 
  77 static int pcfs_pseudo_floppy(dev_t);
  78 
  79 static int pcfsinit(int, char *);
  80 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
  81         struct cred *);
  82 static int pcfs_unmount(struct vfs *, int, struct cred *);
  83 static int pcfs_root(struct vfs *, struct vnode **);
  84 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
  85 static int pc_syncfsnodes(struct pcfs *);
  86 static int pcfs_sync(struct vfs *, short, struct cred *);
  87 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
  88 static void pcfs_freevfs(vfs_t *vfsp);
  89 
  90 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
  91 static int pc_writefat(struct pcfs *fsp, daddr_t start);
  92 
  93 static int pc_getfattype(struct pcfs *fsp);
  94 static void pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap);
  95 
  96 
  97 /*
  98  * pcfs mount options table
  99  */
 100 
 101 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
 102 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
 103 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
 104 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
 105 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
 106 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
 107 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
 108 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
 109 
 110 static mntopt_t mntopts[] = {
 111 /*
 112  *      option name     cancel option   default arg     flags   opt data
 113  */
 114         { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
 115         { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
 116         { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
 117         { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
 118         { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
 119         { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
 120         { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
 121         { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
 122         { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
 123         { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
 124 };
 125 
 126 static mntopts_t pcfs_mntopts = {
 127         sizeof (mntopts) / sizeof (mntopt_t),
 128         mntopts
 129 };
 130 
 131 int pcfsdebuglevel = 0;
 132 
 133 /*
 134  * pcfslock:    protects the list of mounted pc filesystems "pc_mounttab.
 135  * pcfs_lock:   (inside per filesystem structure "pcfs")
 136  *              per filesystem lock. Most of the vfsops and vnodeops are
 137  *              protected by this lock.
 138  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
 139  *
 140  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
 141  *
 142  * pcfs_mountcount:     used to prevent module unloads while there is still
 143  *                      pcfs state from a former mount hanging around. With
 144  *                      forced umount support, the filesystem module must not
 145  *                      be allowed to go away before the last VFS_FREEVFS()
 146  *                      call has been made.
 147  *                      Since this is just an atomic counter, there's no need
 148  *                      for locking.
 149  */
 150 kmutex_t        pcfslock;
 151 krwlock_t       pcnodes_lock;
 152 uint32_t        pcfs_mountcount;
 153 
 154 static int pcfstype;
 155 
 156 static vfsdef_t vfw = {
 157         VFSDEF_VERSION,
 158         "pcfs",
 159         pcfsinit,
 160         VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS,
 161         &pcfs_mntopts
 162 };
 163 
 164 extern struct mod_ops mod_fsops;
 165 
 166 static struct modlfs modlfs = {
 167         &mod_fsops,
 168         "PC filesystem v1.2",
 169         &vfw
 170 };
 171 
 172 static struct modlinkage modlinkage = {
 173         MODREV_1,
 174         &modlfs,
 175         NULL
 176 };
 177 
 178 int
 179 _init(void)
 180 {
 181         int     error;
 182 
 183 #if !defined(lint)
 184         /* make sure the on-disk structures are sane */
 185         ASSERT(sizeof (struct pcdir) == 32);
 186         ASSERT(sizeof (struct pcdir_lfn) == 32);
 187 #endif
 188         mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
 189         rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
 190         error = mod_install(&modlinkage);
 191         if (error) {
 192                 mutex_destroy(&pcfslock);
 193                 rw_destroy(&pcnodes_lock);
 194         }
 195         return (error);
 196 }
 197 
 198 int
 199 _fini(void)
 200 {
 201         int     error;
 202 
 203         /*
 204          * If a forcedly unmounted instance is still hanging around,
 205          * we cannot allow the module to be unloaded because that would
 206          * cause panics once the VFS framework decides it's time to call
 207          * into VFS_FREEVFS().
 208          */
 209         if (pcfs_mountcount)
 210                 return (EBUSY);
 211 
 212         error = mod_remove(&modlinkage);
 213         if (error)
 214                 return (error);
 215         mutex_destroy(&pcfslock);
 216         rw_destroy(&pcnodes_lock);
 217         /*
 218          * Tear down the operations vectors
 219          */
 220         (void) vfs_freevfsops_by_type(pcfstype);
 221         vn_freevnodeops(pcfs_fvnodeops);
 222         vn_freevnodeops(pcfs_dvnodeops);
 223         return (0);
 224 }
 225 
 226 int
 227 _info(struct modinfo *modinfop)
 228 {
 229         return (mod_info(&modlinkage, modinfop));
 230 }
 231 
 232 /* ARGSUSED1 */
 233 static int
 234 pcfsinit(int fstype, char *name)
 235 {
 236         static const fs_operation_def_t pcfs_vfsops_template[] = {
 237                 VFSNAME_MOUNT,          { .vfs_mount = pcfs_mount },
 238                 VFSNAME_UNMOUNT,        { .vfs_unmount = pcfs_unmount },
 239                 VFSNAME_ROOT,           { .vfs_root = pcfs_root },
 240                 VFSNAME_STATVFS,        { .vfs_statvfs = pcfs_statvfs },
 241                 VFSNAME_SYNC,           { .vfs_sync = pcfs_sync },
 242                 VFSNAME_VGET,           { .vfs_vget = pcfs_vget },
 243                 VFSNAME_FREEVFS,        { .vfs_freevfs = pcfs_freevfs },
 244                 NULL,                   NULL
 245         };
 246         int error;
 247 
 248         error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
 249         if (error != 0) {
 250                 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
 251                 return (error);
 252         }
 253 
 254         error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
 255         if (error != 0) {
 256                 (void) vfs_freevfsops_by_type(fstype);
 257                 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
 258                 return (error);
 259         }
 260 
 261         error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
 262         if (error != 0) {
 263                 (void) vfs_freevfsops_by_type(fstype);
 264                 vn_freevnodeops(pcfs_fvnodeops);
 265                 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
 266                 return (error);
 267         }
 268 
 269         pcfstype = fstype;
 270         (void) pc_init();
 271         pcfs_mountcount = 0;
 272         return (0);
 273 }
 274 
 275 static struct pcfs *pc_mounttab = NULL;
 276 
 277 extern struct pcfs_args pc_tz;
 278 
 279 /*
 280  *  Define some special logical drives we use internal to this file.
 281  */
 282 #define BOOT_PARTITION_DRIVE    99
 283 #define PRIMARY_DOS_DRIVE       1
 284 #define UNPARTITIONED_DRIVE     0
 285 
 286 static int
 287 pcfs_device_identify(
 288         struct vfs *vfsp,
 289         struct mounta *uap,
 290         struct cred *cr,
 291         int *dos_ldrive,
 292         dev_t *xdev)
 293 {
 294         struct pathname special;
 295         char *c;
 296         struct vnode *svp = NULL;
 297         struct vnode *lvp = NULL;
 298         int oflag, aflag;
 299         int error;
 300 
 301         /*
 302          * Resolve path name of special file being mounted.
 303          */
 304         if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
 305                 return (error);
 306         }
 307 
 308         *dos_ldrive = -1;
 309 
 310         if (error =
 311             lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
 312                 /*
 313                  * If there's no device node, the name specified most likely
 314                  * maps to a PCFS-style "partition specifier" to select a
 315                  * harddisk primary/logical partition. Disable floppy-specific
 316                  * checks in such cases unless an explicit :A or :B is
 317                  * requested.
 318                  */
 319 
 320                 /*
 321                  * Split the pathname string at the last ':' separator.
 322                  * If there's no ':' in the device name, or the ':' is the
 323                  * last character in the string, the name is invalid and
 324                  * the error from the previous lookup will be returned.
 325                  */
 326                 c = strrchr(special.pn_path, ':');
 327                 if (c == NULL || strlen(c) == 0)
 328                         goto devlookup_done;
 329 
 330                 *c++ = '\0';
 331 
 332                 /*
 333                  * PCFS partition name suffixes can be:
 334                  *      - "boot" to indicate the X86BOOT partition
 335                  *      - a drive letter [c-z] for the "DOS logical drive"
 336                  *      - a drive number 1..24 for the "DOS logical drive"
 337                  *      - a "floppy name letter", 'a' or 'b' (just strip this)
 338                  */
 339                 if (strcasecmp(c, "boot") == 0) {
 340                         /*
 341                          * The Solaris boot partition is requested.
 342                          */
 343                         *dos_ldrive = BOOT_PARTITION_DRIVE;
 344                 } else if (strspn(c, "0123456789") == strlen(c)) {
 345                         /*
 346                          * All digits - parse the partition number.
 347                          */
 348                         long drvnum = 0;
 349 
 350                         if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
 351                                 /*
 352                                  * A number alright - in the allowed range ?
 353                                  */
 354                                 if (drvnum > 24 || drvnum == 0)
 355                                         error = ENXIO;
 356                         }
 357                         if (error)
 358                                 goto devlookup_done;
 359                         *dos_ldrive = (int)drvnum;
 360                 } else if (strlen(c) == 1) {
 361                         /*
 362                          * A single trailing character was specified.
 363                          *      - [c-zC-Z] means a harddisk partition, and
 364                          *        we retrieve the partition number.
 365                          *      - [abAB] means a floppy drive, so we swallow
 366                          *        the "drive specifier" and test later
 367                          *        whether the physical device is a floppy or
 368                          *        PCMCIA pseudofloppy (sram card).
 369                          */
 370                         *c = tolower(*c);
 371                         if (*c == 'a' || *c == 'b') {
 372                                 *dos_ldrive = UNPARTITIONED_DRIVE;
 373                         } else if (*c < 'c' || *c > 'z') {
 374                                 error = ENXIO;
 375                                 goto devlookup_done;
 376                         } else {
 377                                 *dos_ldrive = 1 + *c - 'c';
 378                         }
 379                 } else {
 380                         /*
 381                          * Can't parse this - pass through previous error.
 382                          */
 383                         goto devlookup_done;
 384                 }
 385 
 386 
 387                 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
 388                     NULLVPP, &svp);
 389         } else {
 390                 *dos_ldrive = UNPARTITIONED_DRIVE;
 391         }
 392 devlookup_done:
 393         pn_free(&special);
 394         if (error)
 395                 return (error);
 396 
 397         ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
 398 
 399         /*
 400          * Verify caller's permission to open the device special file.
 401          */
 402         if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 403             ((uap->flags & MS_RDONLY) != 0)) {
 404                 oflag = FREAD;
 405                 aflag = VREAD;
 406         } else {
 407                 oflag = FREAD | FWRITE;
 408                 aflag = VREAD | VWRITE;
 409         }
 410 
 411         error = vfs_get_lofi(vfsp, &lvp);
 412 
 413         if (error > 0) {
 414                 if (error == ENOENT)
 415                         error = ENODEV;
 416                 goto out;
 417         } else if (error == 0) {
 418                 *xdev = lvp->v_rdev;
 419         } else {
 420                 *xdev = svp->v_rdev;
 421 
 422                 if (svp->v_type != VBLK)
 423                         error = ENOTBLK;
 424 
 425                 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
 426                         goto out;
 427         }
 428 
 429         if (getmajor(*xdev) >= devcnt) {
 430                 error = ENXIO;
 431                 goto out;
 432         }
 433 
 434         if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 435                 goto out;
 436 
 437 out:
 438         if (svp != NULL)
 439                 VN_RELE(svp);
 440         if (lvp != NULL)
 441                 VN_RELE(lvp);
 442         return (error);
 443 }
 444 
 445 static int
 446 pcfs_device_ismounted(
 447         struct vfs *vfsp,
 448         int dos_ldrive,
 449         dev_t xdev,
 450         int *remounting,
 451         dev_t *pseudodev)
 452 {
 453         struct pcfs *fsp;
 454         int remount = *remounting;
 455 
 456         /*
 457          * Ensure that this logical drive isn't already mounted, unless
 458          * this is a REMOUNT request.
 459          * Note: The framework will perform this check if the "...:c"
 460          * PCFS-style "logical drive" syntax has not been used and an
 461          * actually existing physical device is backing this filesystem.
 462          * Once all block device drivers support PC-style partitioning,
 463          * this codeblock can be dropped.
 464          */
 465         *pseudodev = xdev;
 466 
 467         if (dos_ldrive) {
 468                 mutex_enter(&pcfslock);
 469                 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
 470                         if (fsp->pcfs_xdev == xdev &&
 471                             fsp->pcfs_ldrive == dos_ldrive) {
 472                                 mutex_exit(&pcfslock);
 473                                 if (remount) {
 474                                         return (0);
 475                                 } else {
 476                                         return (EBUSY);
 477                                 }
 478                         }
 479                 /*
 480                  * Assign a unique device number for the vfs
 481                  * The old way (getudev() + a constantly incrementing
 482                  * major number) was wrong because it changes vfs_dev
 483                  * across mounts and reboots, which breaks nfs file handles.
 484                  * UFS just uses the real dev_t. We can't do that because
 485                  * of the way pcfs opens fdisk partitons (the :c and :d
 486                  * partitions are on the same dev_t). Though that _might_
 487                  * actually be ok, since the file handle contains an
 488                  * absolute block number, it's probably better to make them
 489                  * different. So I think we should retain the original
 490                  * dev_t, but come up with a different minor number based
 491                  * on the logical drive that will _always_ come up the same.
 492                  * For now, we steal the upper 6 bits.
 493                  */
 494 #ifdef notdef
 495                 /* what should we do here? */
 496                 if (((getminor(xdev) >> 12) & 0x3F) != 0)
 497                         printf("whoops - upper bits used!\n");
 498 #endif
 499                 *pseudodev = makedevice(getmajor(xdev),
 500                     ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
 501                 if (vfs_devmounting(*pseudodev, vfsp)) {
 502                         mutex_exit(&pcfslock);
 503                         return (EBUSY);
 504                 }
 505                 if (vfs_devismounted(*pseudodev)) {
 506                         mutex_exit(&pcfslock);
 507                         if (remount) {
 508                                 return (0);
 509                         } else {
 510                                 return (EBUSY);
 511                         }
 512                 }
 513                 mutex_exit(&pcfslock);
 514         } else {
 515                 *pseudodev = xdev;
 516                 if (vfs_devmounting(*pseudodev, vfsp)) {
 517                         return (EBUSY);
 518                 }
 519                 if (vfs_devismounted(*pseudodev))
 520                         if (remount) {
 521                                 return (0);
 522                         } else {
 523                                 return (EBUSY);
 524                         }
 525         }
 526 
 527         /*
 528          * This is not a remount. Even if MS_REMOUNT was requested,
 529          * the caller needs to proceed as it would on an ordinary
 530          * mount.
 531          */
 532         *remounting = 0;
 533 
 534         ASSERT(*pseudodev);
 535         return (0);
 536 }
 537 
 538 /*
 539  * Get the PCFS-specific mount options from the VFS framework.
 540  * For "timezone" and "secsize", we need to parse the number
 541  * ourselves and ensure its validity.
 542  * Note: "secsize" is deliberately undocumented at this time,
 543  * it's a workaround for devices (particularly: lofi image files)
 544  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
 545  */
 546 static void
 547 pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap)
 548 {
 549         char *c;
 550         char *endptr;
 551         long l;
 552         struct vfs *vfsp = fsp->pcfs_vfs;
 553 
 554         ASSERT(fsp->pcfs_secondswest == 0);
 555         ASSERT(fsp->pcfs_secsize == 0);
 556 
 557         if (uap->flags & MS_RDONLY) {
 558                 vfsp->vfs_flag |= VFS_RDONLY;
 559                 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 560         }
 561 
 562         if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
 563                 fsp->pcfs_flags |= PCFS_HIDDEN;
 564         if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
 565                 fsp->pcfs_flags |= PCFS_FOLDCASE;
 566         if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
 567                 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
 568         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 569                 fsp->pcfs_flags |= PCFS_NOATIME;
 570 
 571         if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
 572                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 573                     endptr == c + strlen(c)) {
 574                         /*
 575                          * A number alright - in the allowed range ?
 576                          */
 577                         if (l <= -12*3600 || l >= 12*3600) {
 578                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 579                                     "'timezone' mount option - %ld "
 580                                     "is out of range. Assuming 0.", l);
 581                                 l = 0;
 582                         }
 583                 } else {
 584                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 585                             "'timezone' mount option - argument %s "
 586                             "is not a valid number. Assuming 0.", c);
 587                         l = 0;
 588                 }
 589                 fsp->pcfs_secondswest = l;
 590         }
 591 
 592         /*
 593          * The "secsize=..." mount option is a workaround for the lack of
 594          * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
 595          * partition table of a disk image and it has been partitioned with
 596          * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
 597          * images.
 598          * That should really be fixed in lofi ... this is a workaround.
 599          */
 600         if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
 601                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 602                     endptr == c + strlen(c)) {
 603                         /*
 604                          * A number alright - a valid sector size as well ?
 605                          */
 606                         if (!VALID_SECSIZE(l)) {
 607                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 608                                     "'secsize' mount option - %ld is "
 609                                     "unsupported. Autodetecting.", l);
 610                                 l = 0;
 611                         }
 612                 } else {
 613                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 614                             "'secsize' mount option - argument %s "
 615                             "is not a valid number. Autodetecting.", c);
 616                         l = 0;
 617                 }
 618                 fsp->pcfs_secsize = l;
 619                 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
 620         }
 621 }
 622 
 623 /*
 624  * vfs operations
 625  */
 626 
 627 /*
 628  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
 629  */
 630 static int
 631 pcfs_mount(
 632         struct vfs *vfsp,
 633         struct vnode *mvp,
 634         struct mounta *uap,
 635         struct cred *cr)
 636 {
 637         struct pcfs *fsp;
 638         struct vnode *devvp;
 639         dev_t pseudodev;
 640         dev_t xdev;
 641         int dos_ldrive = 0;
 642         int error;
 643         int remounting;
 644 
 645         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 646                 return (error);
 647 
 648         if (mvp->v_type != VDIR)
 649                 return (ENOTDIR);
 650 
 651         mutex_enter(&mvp->v_lock);
 652         if ((uap->flags & MS_REMOUNT) == 0 &&
 653             (uap->flags & MS_OVERLAY) == 0 &&
 654             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 655                 mutex_exit(&mvp->v_lock);
 656                 return (EBUSY);
 657         }
 658         mutex_exit(&mvp->v_lock);
 659 
 660         /*
 661          * PCFS doesn't do mount arguments anymore - everything's a mount
 662          * option these days. In order not to break existing callers, we
 663          * don't reject it yet, just warn that the data (if any) is ignored.
 664          */
 665         if (uap->datalen != 0)
 666                 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
 667                     "mount argument structures instead of mount options. "
 668                     "Ignoring mount(2) 'dataptr' argument.");
 669 
 670         /*
 671          * For most filesystems, this is just a lookupname() on the
 672          * mount pathname string. PCFS historically has to do its own
 673          * partition table parsing because not all Solaris architectures
 674          * support all styles of partitioning that PC media can have, and
 675          * hence PCFS understands "device names" that don't map to actual
 676          * physical device nodes. Parsing the "PCFS syntax" for device
 677          * names is done in pcfs_device_identify() - see there.
 678          *
 679          * Once all block device drivers that can host FAT filesystems have
 680          * been enhanced to create device nodes for all PC-style partitions,
 681          * this code can go away.
 682          */
 683         if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
 684                 return (error);
 685 
 686         /*
 687          * As with looking up the actual device to mount, PCFS cannot rely
 688          * on just the checks done by vfs_ismounted() whether a given device
 689          * is mounted already. The additional check against the "PCFS syntax"
 690          * is done in  pcfs_device_ismounted().
 691          */
 692         remounting = (uap->flags & MS_REMOUNT);
 693 
 694         if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
 695             &pseudodev))
 696                 return (error);
 697 
 698         if (remounting)
 699                 return (0);
 700 
 701         /*
 702          * Mount the filesystem.
 703          * An instance structure is required before the attempt to locate
 704          * and parse the FAT BPB. This is because mount options may change
 705          * the behaviour of the filesystem type matching code. Precreate
 706          * it and fill it in to a degree that allows parsing the mount
 707          * options.
 708          */
 709         devvp = makespecvp(xdev, VBLK);
 710         if (IS_SWAPVP(devvp)) {
 711                 VN_RELE(devvp);
 712                 return (EBUSY);
 713         }
 714         error = VOP_OPEN(&devvp,
 715             (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
 716         if (error) {
 717                 VN_RELE(devvp);
 718                 return (error);
 719         }
 720 
 721         fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
 722         fsp->pcfs_vfs = vfsp;
 723         fsp->pcfs_xdev = xdev;
 724         fsp->pcfs_devvp = devvp;
 725         fsp->pcfs_ldrive = dos_ldrive;
 726         mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
 727         vfsp->vfs_data = fsp;
 728         vfsp->vfs_dev = pseudodev;
 729         vfsp->vfs_fstype = pcfstype;
 730         vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
 731         vfsp->vfs_bcount = 0;
 732         vfsp->vfs_bsize = fsp->pcfs_clsize;
 733 
 734         pcfs_parse_mntopts(fsp, uap);
 735 
 736         /*
 737          * This is the actual "mount" - the PCFS superblock check.
 738          *
 739          * Find the requested logical drive and the FAT BPB therein.
 740          * Check device type and flag the instance if media is removeable.
 741          *
 742          * Initializes most members of the filesystem instance structure.
 743          * Returns EINVAL if no valid BPB can be found. Other errors may
 744          * occur after I/O failures, or when invalid / unparseable partition
 745          * tables are encountered.
 746          */
 747         if (error = pc_getfattype(fsp))
 748                 goto errout;
 749 
 750         /*
 751          * Validate that we can access the FAT and that it is, to the
 752          * degree we can verify here, self-consistent.
 753          */
 754         if (error = pc_verify(fsp))
 755                 goto errout;
 756 
 757         /*
 758          * Record the time of the mount, to return as an "approximate"
 759          * timestamp for the FAT root directory. Since FAT roots don't
 760          * have timestamps, this is less confusing to the user than
 761          * claiming "zero" / Jan/01/1970.
 762          */
 763         gethrestime(&fsp->pcfs_mounttime);
 764 
 765         /*
 766          * Fix up the mount options. Because "noatime" is made default on
 767          * removeable media only, a fixed disk will have neither "atime"
 768          * nor "noatime" set. We set the options explicitly depending on
 769          * the PCFS_NOATIME flag, to inform the user of what applies.
 770          * Mount option cancellation will take care that the mutually
 771          * exclusive 'other' is cleared.
 772          */
 773         vfs_setmntopt(vfsp,
 774             fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
 775             NULL, 0);
 776 
 777         /*
 778          * All clear - insert the FS instance into PCFS' list.
 779          */
 780         mutex_enter(&pcfslock);
 781         fsp->pcfs_nxt = pc_mounttab;
 782         pc_mounttab = fsp;
 783         mutex_exit(&pcfslock);
 784         atomic_inc_32(&pcfs_mountcount);
 785         return (0);
 786 
 787 errout:
 788         (void) VOP_CLOSE(devvp,
 789             vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
 790             1, (offset_t)0, cr, NULL);
 791         VN_RELE(devvp);
 792         mutex_destroy(&fsp->pcfs_lock);
 793         kmem_free(fsp, sizeof (*fsp));
 794         return (error);
 795 
 796 }
 797 
 798 static int
 799 pcfs_unmount(
 800         struct vfs *vfsp,
 801         int flag,
 802         struct cred *cr)
 803 {
 804         struct pcfs *fsp, *fsp1;
 805 
 806         if (secpolicy_fs_unmount(cr, vfsp) != 0)
 807                 return (EPERM);
 808 
 809         fsp = VFSTOPCFS(vfsp);
 810 
 811         /*
 812          * We don't have to lock fsp because the VVFSLOCK in vfs layer will
 813          * prevent lookuppn from crossing the mount point.
 814          * If this is not a forced umount request and there's ongoing I/O,
 815          * don't allow the mount to proceed.
 816          */
 817         if (flag & MS_FORCE)
 818                 vfsp->vfs_flag |= VFS_UNMOUNTED;
 819         else if (fsp->pcfs_nrefs)
 820                 return (EBUSY);
 821 
 822         mutex_enter(&pcfslock);
 823 
 824         /*
 825          * If this is a forced umount request or if the fs instance has
 826          * been marked as beyond recovery, allow the umount to proceed
 827          * regardless of state. pc_diskchanged() forcibly releases all
 828          * inactive vnodes/pcnodes.
 829          */
 830         if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
 831                 rw_enter(&pcnodes_lock, RW_WRITER);
 832                 pc_diskchanged(fsp);
 833                 rw_exit(&pcnodes_lock);
 834         }
 835 
 836         /* now there should be no pcp node on pcfhead or pcdhead. */
 837 
 838         if (fsp == pc_mounttab) {
 839                 pc_mounttab = fsp->pcfs_nxt;
 840         } else {
 841                 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
 842                         if (fsp1->pcfs_nxt == fsp)
 843                                 fsp1->pcfs_nxt = fsp->pcfs_nxt;
 844         }
 845 
 846         mutex_exit(&pcfslock);
 847 
 848         /*
 849          * Since we support VFS_FREEVFS(), there's no need to
 850          * free the fsp right now. The framework will tell us
 851          * when the right time to do so has arrived by calling
 852          * into pcfs_freevfs.
 853          */
 854         return (0);
 855 }
 856 
 857 /*
 858  * find root of pcfs
 859  */
 860 static int
 861 pcfs_root(
 862         struct vfs *vfsp,
 863         struct vnode **vpp)
 864 {
 865         struct pcfs *fsp;
 866         struct pcnode *pcp;
 867         int error;
 868 
 869         fsp = VFSTOPCFS(vfsp);
 870         if (error = pc_lockfs(fsp, 0, 0))
 871                 return (error);
 872 
 873         pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
 874         pc_unlockfs(fsp);
 875         *vpp = PCTOV(pcp);
 876         pcp->pc_flags |= PC_EXTERNAL;
 877         return (0);
 878 }
 879 
 880 /*
 881  * Get file system statistics.
 882  */
 883 static int
 884 pcfs_statvfs(
 885         struct vfs *vfsp,
 886         struct statvfs64 *sp)
 887 {
 888         struct pcfs *fsp;
 889         int error;
 890         dev32_t d32;
 891 
 892         fsp = VFSTOPCFS(vfsp);
 893         error = pc_getfat(fsp);
 894         if (error)
 895                 return (error);
 896         bzero(sp, sizeof (*sp));
 897         sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
 898         sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
 899         sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
 900         sp->f_files = (fsfilcnt64_t)-1;
 901         sp->f_ffree = (fsfilcnt64_t)-1;
 902         sp->f_favail = (fsfilcnt64_t)-1;
 903 #ifdef notdef
 904         (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
 905 #endif /* notdef */
 906         (void) cmpldev(&d32, vfsp->vfs_dev);
 907         sp->f_fsid = d32;
 908         (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
 909         sp->f_flag = vf_to_stf(vfsp->vfs_flag);
 910         sp->f_namemax = PCFNAMESIZE;
 911         return (0);
 912 }
 913 
 914 static int
 915 pc_syncfsnodes(struct pcfs *fsp)
 916 {
 917         struct pchead *hp;
 918         struct pcnode *pcp;
 919         int error;
 920 
 921         if (error = pc_lockfs(fsp, 0, 0))
 922                 return (error);
 923 
 924         if (!(error = pc_syncfat(fsp))) {
 925                 hp = pcfhead;
 926                 while (hp < & pcfhead [ NPCHASH ]) {
 927                         rw_enter(&pcnodes_lock, RW_READER);
 928                         pcp = hp->pch_forw;
 929                         while (pcp != (struct pcnode *)hp) {
 930                                 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
 931                                         if (error = pc_nodesync(pcp))
 932                                                 break;
 933                                 pcp = pcp -> pc_forw;
 934                         }
 935                         rw_exit(&pcnodes_lock);
 936                         if (error)
 937                                 break;
 938                         hp++;
 939                 }
 940         }
 941         pc_unlockfs(fsp);
 942         return (error);
 943 }
 944 
 945 /*
 946  * Flush any pending I/O.
 947  */
 948 /*ARGSUSED*/
 949 static int
 950 pcfs_sync(
 951         struct vfs *vfsp,
 952         short flag,
 953         struct cred *cr)
 954 {
 955         struct pcfs *fsp;
 956         int error = 0;
 957 
 958         /* this prevents the filesystem from being umounted. */
 959         mutex_enter(&pcfslock);
 960         if (vfsp != NULL) {
 961                 fsp = VFSTOPCFS(vfsp);
 962                 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
 963                         error = pc_syncfsnodes(fsp);
 964                 } else {
 965                         rw_enter(&pcnodes_lock, RW_WRITER);
 966                         pc_diskchanged(fsp);
 967                         rw_exit(&pcnodes_lock);
 968                         error = EIO;
 969                 }
 970         } else {
 971                 fsp = pc_mounttab;
 972                 while (fsp != NULL) {
 973                         if (fsp->pcfs_flags & PCFS_IRRECOV) {
 974                                 rw_enter(&pcnodes_lock, RW_WRITER);
 975                                 pc_diskchanged(fsp);
 976                                 rw_exit(&pcnodes_lock);
 977                                 error = EIO;
 978                                 break;
 979                         }
 980                         error = pc_syncfsnodes(fsp);
 981                         if (error) break;
 982                         fsp = fsp->pcfs_nxt;
 983                 }
 984         }
 985         mutex_exit(&pcfslock);
 986         return (error);
 987 }
 988 
 989 int
 990 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
 991 {
 992         int err;
 993 
 994         if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
 995                 return (EIO);
 996 
 997         if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
 998                 fsp->pcfs_count++;
 999         } else {
1000                 mutex_enter(&fsp->pcfs_lock);
1001                 if (fsp->pcfs_flags & PCFS_LOCKED)
1002                         panic("pc_lockfs");
1003                 /*
1004                  * We check the IRRECOV bit again just in case somebody
1005                  * snuck past the initial check but then got held up before
1006                  * they could grab the lock.  (And in the meantime someone
1007                  * had grabbed the lock and set the bit)
1008                  */
1009                 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1010                         if ((err = pc_getfat(fsp))) {
1011                                 mutex_exit(&fsp->pcfs_lock);
1012                                 return (err);
1013                         }
1014                 }
1015                 fsp->pcfs_flags |= PCFS_LOCKED;
1016                 fsp->pcfs_owner = curthread;
1017                 fsp->pcfs_count++;
1018         }
1019         return (0);
1020 }
1021 
1022 void
1023 pc_unlockfs(struct pcfs *fsp)
1024 {
1025 
1026         if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1027                 panic("pc_unlockfs");
1028         if (--fsp->pcfs_count < 0)
1029                 panic("pc_unlockfs: count");
1030         if (fsp->pcfs_count == 0) {
1031                 fsp->pcfs_flags &= ~PCFS_LOCKED;
1032                 fsp->pcfs_owner = 0;
1033                 mutex_exit(&fsp->pcfs_lock);
1034         }
1035 }
1036 
1037 int
1038 pc_syncfat(struct pcfs *fsp)
1039 {
1040         struct buf *bp;
1041         int nfat;
1042         int     error = 0;
1043         struct fat_od_fsi *fsinfo_disk;
1044 
1045         if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1046             !(fsp->pcfs_flags & PCFS_FATMOD))
1047                 return (0);
1048         /*
1049          * write out all copies of FATs
1050          */
1051         fsp->pcfs_flags &= ~PCFS_FATMOD;
1052         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1053         for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1054                 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1055                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1056                 if (error) {
1057                         pc_mark_irrecov(fsp);
1058                         return (EIO);
1059                 }
1060         }
1061         pc_clear_fatchanges(fsp);
1062 
1063         /*
1064          * Write out fsinfo sector.
1065          */
1066         if (IS_FAT32(fsp)) {
1067                 bp = bread(fsp->pcfs_xdev,
1068                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1069                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1070                         error = geterror(bp);
1071                 }
1072                 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1073                 if (!error && FSISIG_OK(fsinfo_disk)) {
1074                         fsinfo_disk->fsi_incore.fs_free_clusters =
1075                             LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1076                         fsinfo_disk->fsi_incore.fs_next_free =
1077                             LE_32(FSINFO_UNKNOWN);
1078                         bwrite2(bp);
1079                         error = geterror(bp);
1080                 }
1081                 brelse(bp);
1082                 if (error) {
1083                         pc_mark_irrecov(fsp);
1084                         return (EIO);
1085                 }
1086         }
1087         return (0);
1088 }
1089 
1090 void
1091 pc_invalfat(struct pcfs *fsp)
1092 {
1093         struct pcfs *xfsp;
1094         int mount_cnt = 0;
1095 
1096         if (fsp->pcfs_fatp == (uchar_t *)0)
1097                 panic("pc_invalfat");
1098         /*
1099          * Release FAT
1100          */
1101         kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1102         fsp->pcfs_fatp = NULL;
1103         kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1104         fsp->pcfs_fat_changemap = NULL;
1105         /*
1106          * Invalidate all the blocks associated with the device.
1107          * Not needed if stateless.
1108          */
1109         for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1110                 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1111                         mount_cnt++;
1112 
1113         if (!mount_cnt)
1114                 binval(fsp->pcfs_xdev);
1115         /*
1116          * close mounted device
1117          */
1118         (void) VOP_CLOSE(fsp->pcfs_devvp,
1119             (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1120             1, (offset_t)0, CRED(), NULL);
1121 }
1122 
1123 void
1124 pc_badfs(struct pcfs *fsp)
1125 {
1126         cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1127             getmajor(fsp->pcfs_devvp->v_rdev),
1128             getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1129 }
1130 
1131 /*
1132  * The problem with supporting NFS on the PCFS filesystem is that there
1133  * is no good place to keep the generation number. The only possible
1134  * place is inside a directory entry. There are a few words that we
1135  * don't use - they store NT & OS/2 attributes, and the creation/last access
1136  * time of the file - but it seems wrong to use them. In addition, directory
1137  * entries come and go. If a directory is removed completely, its directory
1138  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1139  * inode blocks are dedicated for inodes, so the generation numbers are
1140  * permanently kept on the disk.
1141  */
1142 static int
1143 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1144 {
1145         struct pcnode *pcp;
1146         struct pc_fid *pcfid;
1147         struct pcfs *fsp;
1148         struct pcdir *ep;
1149         daddr_t eblkno;
1150         int eoffset;
1151         struct buf *bp;
1152         int error;
1153         pc_cluster32_t  cn;
1154 
1155         pcfid = (struct pc_fid *)fidp;
1156         fsp = VFSTOPCFS(vfsp);
1157 
1158         error = pc_lockfs(fsp, 0, 0);
1159         if (error) {
1160                 *vpp = NULL;
1161                 return (error);
1162         }
1163 
1164         if (pcfid->pcfid_block == 0) {
1165                 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1166                 pcp->pc_flags |= PC_EXTERNAL;
1167                 *vpp = PCTOV(pcp);
1168                 pc_unlockfs(fsp);
1169                 return (0);
1170         }
1171         eblkno = pcfid->pcfid_block;
1172         eoffset = pcfid->pcfid_offset;
1173 
1174         if ((pc_dbtocl(fsp,
1175             eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1176             (eoffset > fsp->pcfs_clsize)) {
1177                 pc_unlockfs(fsp);
1178                 *vpp = NULL;
1179                 return (EINVAL);
1180         }
1181 
1182         if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1183             < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1184                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1185                     fsp->pcfs_clsize);
1186         } else {
1187                 /*
1188                  * This is an access "backwards" into the FAT12/FAT16
1189                  * root directory. A better code structure would
1190                  * significantly improve maintainability here ...
1191                  */
1192                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1193                     (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1194         }
1195         if (bp->b_flags & (B_ERROR | B_STALE)) {
1196                 error = geterror(bp);
1197                 brelse(bp);
1198                 if (error)
1199                         pc_mark_irrecov(fsp);
1200                 *vpp = NULL;
1201                 pc_unlockfs(fsp);
1202                 return (error);
1203         }
1204         ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1205         /*
1206          * Ok, if this is a valid file handle that we gave out,
1207          * then simply ensuring that the creation time matches,
1208          * the entry has not been deleted, and it has a valid first
1209          * character should be enough.
1210          *
1211          * Unfortunately, verifying that the <blkno, offset> _still_
1212          * refers to a directory entry is not easy, since we'd have
1213          * to search _all_ directories starting from root to find it.
1214          * That's a high price to pay just in case somebody is forging
1215          * file handles. So instead we verify that as much of the
1216          * entry is valid as we can:
1217          *
1218          * 1. The starting cluster is 0 (unallocated) or valid
1219          * 2. It is not an LFN entry
1220          * 3. It is not hidden (unless mounted as such)
1221          * 4. It is not the label
1222          */
1223         cn = pc_getstartcluster(fsp, ep);
1224         /*
1225          * if the starting cluster is valid, but not valid according
1226          * to pc_validcl(), force it to be to simplify the following if.
1227          */
1228         if (cn == 0)
1229                 cn = PCF_FIRSTCLUSTER;
1230         if (IS_FAT32(fsp)) {
1231                 if (cn >= PCF_LASTCLUSTER32)
1232                         cn = PCF_FIRSTCLUSTER;
1233         } else {
1234                 if (cn >= PCF_LASTCLUSTER)
1235                         cn = PCF_FIRSTCLUSTER;
1236         }
1237         if ((!pc_validcl(fsp, cn)) ||
1238             (PCDL_IS_LFN(ep)) ||
1239             (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1240             ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1241                 bp->b_flags |= B_STALE | B_AGE;
1242                 brelse(bp);
1243                 pc_unlockfs(fsp);
1244                 return (EINVAL);
1245         }
1246         if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1247             (ep->pcd_filename[0] != PCD_ERASED) &&
1248             (pc_validchar(ep->pcd_filename[0]) ||
1249             (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1250                 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1251                 pcp->pc_flags |= PC_EXTERNAL;
1252                 *vpp = PCTOV(pcp);
1253         } else {
1254                 *vpp = NULL;
1255         }
1256         bp->b_flags |= B_STALE | B_AGE;
1257         brelse(bp);
1258         pc_unlockfs(fsp);
1259         return (0);
1260 }
1261 
1262 /*
1263  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1264  * a meg), so we can't bread() it all in at once. This routine reads a
1265  * fat a chunk at a time.
1266  */
1267 static int
1268 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1269 {
1270         struct buf *bp;
1271         size_t off;
1272         size_t readsize;
1273         daddr_t diskblk;
1274         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1275         daddr_t start = fsp->pcfs_fatstart;
1276 
1277         readsize = fsp->pcfs_clsize;
1278         for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1279                 if (readsize > (fatsize - off))
1280                         readsize = fatsize - off;
1281                 diskblk = pc_dbdaddr(fsp, start +
1282                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1283                 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1284                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1285                         brelse(bp);
1286                         return (EIO);
1287                 }
1288                 bp->b_flags |= B_STALE | B_AGE;
1289                 bcopy(bp->b_un.b_addr, fatp, readsize);
1290                 brelse(bp);
1291         }
1292         return (0);
1293 }
1294 
1295 /*
1296  * We write the FAT out a _lot_, in order to make sure that it
1297  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1298  * the FAT might be a couple of megabytes, and writing it all out just
1299  * because we created or deleted a small file is painful (especially
1300  * since we do it for each alternate FAT too). So instead, for FAT16 and
1301  * FAT32 we only write out the bit that has changed. We don't clear
1302  * the 'updated' fields here because the caller might be writing out
1303  * several FATs, so the caller must use pc_clear_fatchanges() after
1304  * all FATs have been updated.
1305  * This function doesn't take "start" from fsp->pcfs_dosstart because
1306  * callers can use it to write either the primary or any of the alternate
1307  * FAT tables.
1308  */
1309 static int
1310 pc_writefat(struct pcfs *fsp, daddr_t start)
1311 {
1312         struct buf *bp;
1313         size_t off;
1314         size_t writesize;
1315         int     error;
1316         uchar_t *fatp = fsp->pcfs_fatp;
1317         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1318 
1319         writesize = fsp->pcfs_clsize;
1320         for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1321                 if (writesize > (fatsize - off))
1322                         writesize = fatsize - off;
1323                 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1324                         continue;
1325                 }
1326                 bp = ngeteblk(writesize);
1327                 bp->b_edev = fsp->pcfs_xdev;
1328                 bp->b_dev = cmpdev(bp->b_edev);
1329                 bp->b_blkno = pc_dbdaddr(fsp, start +
1330                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1331                 bcopy(fatp, bp->b_un.b_addr, writesize);
1332                 bwrite2(bp);
1333                 error = geterror(bp);
1334                 brelse(bp);
1335                 if (error) {
1336                         return (error);
1337                 }
1338         }
1339         return (0);
1340 }
1341 
1342 /*
1343  * Mark the FAT cluster that 'cn' is stored in as modified.
1344  */
1345 void
1346 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1347 {
1348         pc_cluster32_t  bn;
1349         size_t          size;
1350 
1351         /* which fat block is the cluster number stored in? */
1352         if (IS_FAT32(fsp)) {
1353                 size = sizeof (pc_cluster32_t);
1354                 bn = pc_lblkno(fsp, cn * size);
1355                 fsp->pcfs_fat_changemap[bn] = 1;
1356         } else if (IS_FAT16(fsp)) {
1357                 size = sizeof (pc_cluster16_t);
1358                 bn = pc_lblkno(fsp, cn * size);
1359                 fsp->pcfs_fat_changemap[bn] = 1;
1360         } else {
1361                 offset_t off;
1362                 pc_cluster32_t nbn;
1363 
1364                 ASSERT(IS_FAT12(fsp));
1365                 off = cn + (cn >> 1);
1366                 bn = pc_lblkno(fsp, off);
1367                 fsp->pcfs_fat_changemap[bn] = 1;
1368                 /* does this field wrap into the next fat cluster? */
1369                 nbn = pc_lblkno(fsp, off + 1);
1370                 if (nbn != bn) {
1371                         fsp->pcfs_fat_changemap[nbn] = 1;
1372                 }
1373         }
1374 }
1375 
1376 /*
1377  * return whether the FAT cluster 'bn' is updated and needs to
1378  * be written out.
1379  */
1380 int
1381 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1382 {
1383         return (fsp->pcfs_fat_changemap[bn] == 1);
1384 }
1385 
1386 /*
1387  * Implementation of VFS_FREEVFS() to support forced umounts.
1388  * This is called by the vfs framework after umount, to trigger
1389  * the release of any resources still associated with the given
1390  * vfs_t once the need to keep them has gone away.
1391  */
1392 void
1393 pcfs_freevfs(vfs_t *vfsp)
1394 {
1395         struct pcfs *fsp = VFSTOPCFS(vfsp);
1396 
1397         mutex_enter(&pcfslock);
1398         /*
1399          * Purging the FAT closes the device - can't do any more
1400          * I/O after this.
1401          */
1402         if (fsp->pcfs_fatp != (uchar_t *)0)
1403                 pc_invalfat(fsp);
1404         mutex_exit(&pcfslock);
1405 
1406         VN_RELE(fsp->pcfs_devvp);
1407         mutex_destroy(&fsp->pcfs_lock);
1408         kmem_free(fsp, sizeof (*fsp));
1409 
1410         /*
1411          * Allow _fini() to succeed now, if so desired.
1412          */
1413         atomic_dec_32(&pcfs_mountcount);
1414 }
1415 
1416 
1417 /*
1418  * PC-style partition parsing and FAT BPB identification/validation code.
1419  * The partition parsers here assume:
1420  *      - a FAT filesystem will be in a partition that has one of a set of
1421  *        recognized partition IDs
1422  *      - the user wants the 'numbering' (C:, D:, ...) that one would get
1423  *        on MSDOS 6.x.
1424  *        That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1425  *        will not factor in the enumeration.
1426  * These days, such assumptions should be revisited. FAT is no longer the
1427  * only game in 'PC town'.
1428  */
1429 /*
1430  * isDosDrive()
1431  *      Boolean function.  Give it the systid field for an fdisk partition
1432  *      and it decides if that's a systid that describes a DOS drive.  We
1433  *      use systid values defined in sys/dktp/fdisk.h.
1434  */
1435 static int
1436 isDosDrive(uchar_t checkMe)
1437 {
1438         return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1439             (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1440             (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1441             (checkMe == DIAGPART));
1442 }
1443 
1444 
1445 /*
1446  * isDosExtended()
1447  *      Boolean function.  Give it the systid field for an fdisk partition
1448  *      and it decides if that's a systid that describes an extended DOS
1449  *      partition.
1450  */
1451 static int
1452 isDosExtended(uchar_t checkMe)
1453 {
1454         return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1455 }
1456 
1457 
1458 /*
1459  * isBootPart()
1460  *      Boolean function.  Give it the systid field for an fdisk partition
1461  *      and it decides if that's a systid that describes a Solaris boot
1462  *      partition.
1463  */
1464 static int
1465 isBootPart(uchar_t checkMe)
1466 {
1467         return (checkMe == X86BOOT);
1468 }
1469 
1470 
1471 /*
1472  * noLogicalDrive()
1473  *      Display error message about not being able to find a logical
1474  *      drive.
1475  */
1476 static void
1477 noLogicalDrive(int ldrive)
1478 {
1479         if (ldrive == BOOT_PARTITION_DRIVE) {
1480                 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1481         } else {
1482                 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1483         }
1484 }
1485 
1486 
1487 /*
1488  * findTheDrive()
1489  *      Discover offset of the requested logical drive, and return
1490  *      that offset (startSector), the systid of that drive (sysid),
1491  *      and a buffer pointer (bp), with the buffer contents being
1492  *      the first sector of the logical drive (i.e., the sector that
1493  *      contains the BPB for that drive).
1494  *
1495  * Note: this code is not capable of addressing >2TB disks, as it uses
1496  *       daddr_t not diskaddr_t, some of the calculations would overflow
1497  */
1498 #define COPY_PTBL(mbr, ptblp)                                   \
1499         bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),        \
1500             FD_NUMPART * sizeof (struct ipart))
1501 
1502 static int
1503 findTheDrive(struct pcfs *fsp, buf_t **bp)
1504 {
1505         int ldrive = fsp->pcfs_ldrive;
1506         dev_t dev = fsp->pcfs_devvp->v_rdev;
1507 
1508         struct ipart dosp[FD_NUMPART];  /* incore fdisk partition structure */
1509         daddr_t lastseek = 0;           /* Disk block we sought previously */
1510         daddr_t diskblk = 0;            /* Disk block to get */
1511         daddr_t xstartsect;             /* base of Extended DOS partition */
1512         int logicalDriveCount = 0;      /* Count of logical drives seen */
1513         int extendedPart = -1;          /* index of extended dos partition */
1514         int primaryPart = -1;           /* index of primary dos partition */
1515         int bootPart = -1;              /* index of a Solaris boot partition */
1516         int xnumsect = -1;              /* length of extended DOS partition */
1517         int driveIndex;                 /* computed FDISK table index */
1518         daddr_t startsec;
1519         len_t mediasize;
1520         int i;
1521         /*
1522          * Count of drives in the current extended partition's
1523          * FDISK table, and indexes of the drives themselves.
1524          */
1525         int extndDrives[FD_NUMPART];
1526         int numDrives = 0;
1527 
1528         /*
1529          * Count of drives (beyond primary) in master boot record's
1530          * FDISK table, and indexes of the drives themselves.
1531          */
1532         int extraDrives[FD_NUMPART];
1533         int numExtraDrives = 0;
1534 
1535         /*
1536          * "ldrive == 0" should never happen, as this is a request to
1537          * mount the physical device (and ignore partitioning). The code
1538          * in pcfs_mount() should have made sure that a logical drive number
1539          * is at least 1, meaning we're looking for drive "C:". It is not
1540          * safe (and a bug in the callers of this function) to request logical
1541          * drive number 0; we could ASSERT() but a graceful EIO is a more
1542          * polite way.
1543          */
1544         if (ldrive == 0) {
1545                 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1546                 noLogicalDrive(ldrive);
1547                 return (EIO);
1548         }
1549 
1550         /*
1551          *  Copy from disk block into memory aligned structure for fdisk usage.
1552          */
1553         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1554 
1555         /*
1556          * This check is ok because a FAT BPB and a master boot record (MBB)
1557          * have the same signature, in the same position within the block.
1558          */
1559         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1560                 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1561                     "device (%x.%x):%d\n",
1562                     getmajor(dev), getminor(dev), ldrive);
1563                 return (EINVAL);
1564         }
1565 
1566         /*
1567          * Get a summary of what is in the Master FDISK table.
1568          * Normally we expect to find one partition marked as a DOS drive.
1569          * This partition is the one Windows calls the primary dos partition.
1570          * If the machine has any logical drives then we also expect
1571          * to find a partition marked as an extended DOS partition.
1572          *
1573          * Sometimes we'll find multiple partitions marked as DOS drives.
1574          * The Solaris fdisk program allows these partitions
1575          * to be created, but Windows fdisk no longer does.  We still need
1576          * to support these, though, since Windows does.  We also need to fix
1577          * our fdisk to behave like the Windows version.
1578          *
1579          * It turns out that some off-the-shelf media have *only* an
1580          * Extended partition, so we need to deal with that case as well.
1581          *
1582          * Only a single (the first) Extended or Boot Partition will
1583          * be recognized.  Any others will be ignored.
1584          */
1585         for (i = 0; i < FD_NUMPART; i++) {
1586                 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1587                     uint_t, (uint_t)dosp[i].systid,
1588                     uint_t, LE_32(dosp[i].relsect),
1589                     uint_t, LE_32(dosp[i].numsect));
1590 
1591                 if (isDosDrive(dosp[i].systid)) {
1592                         if (primaryPart < 0) {
1593                                 logicalDriveCount++;
1594                                 primaryPart = i;
1595                         } else {
1596                                 extraDrives[numExtraDrives++] = i;
1597                         }
1598                         continue;
1599                 }
1600                 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1601                         extendedPart = i;
1602                         continue;
1603                 }
1604                 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1605                         bootPart = i;
1606                         continue;
1607                 }
1608         }
1609 
1610         if (ldrive == BOOT_PARTITION_DRIVE) {
1611                 if (bootPart < 0) {
1612                         noLogicalDrive(ldrive);
1613                         return (EINVAL);
1614                 }
1615                 startsec = LE_32(dosp[bootPart].relsect);
1616                 mediasize = LE_32(dosp[bootPart].numsect);
1617                 goto found;
1618         }
1619 
1620         if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1621                 startsec = LE_32(dosp[primaryPart].relsect);
1622                 mediasize = LE_32(dosp[primaryPart].numsect);
1623                 goto found;
1624         }
1625 
1626         /*
1627          * We are not looking for the C: drive (or the primary drive
1628          * was not found), so we had better have an extended partition
1629          * or extra drives in the Master FDISK table.
1630          */
1631         if ((extendedPart < 0) && (numExtraDrives == 0)) {
1632                 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1633                 noLogicalDrive(ldrive);
1634                 return (EINVAL);
1635         }
1636 
1637         if (extendedPart >= 0) {
1638                 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1639                 xnumsect = LE_32(dosp[extendedPart].numsect);
1640                 do {
1641                         /*
1642                          *  If the seek would not cause us to change
1643                          *  position on the drive, then we're out of
1644                          *  extended partitions to examine.
1645                          */
1646                         if (diskblk == lastseek)
1647                                 break;
1648                         logicalDriveCount += numDrives;
1649                         /*
1650                          *  Seek the next extended partition, and find
1651                          *  logical drives within it.
1652                          */
1653                         brelse(*bp);
1654                         /*
1655                          * bread() block numbers are multiples of DEV_BSIZE
1656                          * but the device sector size (the unit of partitioning)
1657                          * might be larger than that; pcfs_get_device_info()
1658                          * has calculated the multiplicator for us.
1659                          */
1660                         *bp = bread(dev,
1661                             pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1662                         if ((*bp)->b_flags & B_ERROR) {
1663                                 return (EIO);
1664                         }
1665 
1666                         lastseek = diskblk;
1667                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1668                         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1669                                 cmn_err(CE_NOTE, "!pcfs: "
1670                                     "extended partition table signature err, "
1671                                     "device (%x.%x):%d, LBA %u",
1672                                     getmajor(dev), getminor(dev), ldrive,
1673                                     (uint_t)pc_dbdaddr(fsp, diskblk));
1674                                 return (EINVAL);
1675                         }
1676                         /*
1677                          *  Count up drives, and track where the next
1678                          *  extended partition is in case we need it.  We
1679                          *  are expecting only one extended partition.  If
1680                          *  there is more than one we'll only go to the
1681                          *  first one we see, but warn about ignoring.
1682                          */
1683                         numDrives = 0;
1684                         for (i = 0; i < FD_NUMPART; i++) {
1685                                 DTRACE_PROBE4(extendedpart,
1686                                     struct pcfs *, fsp,
1687                                     uint_t, (uint_t)dosp[i].systid,
1688                                     uint_t, LE_32(dosp[i].relsect),
1689                                     uint_t, LE_32(dosp[i].numsect));
1690                                 if (isDosDrive(dosp[i].systid)) {
1691                                         extndDrives[numDrives++] = i;
1692                                 } else if (isDosExtended(dosp[i].systid)) {
1693                                         if (diskblk != lastseek) {
1694                                                 /*
1695                                                  * Already found an extended
1696                                                  * partition in this table.
1697                                                  */
1698                                                 cmn_err(CE_NOTE,
1699                                                     "!pcfs: ignoring unexpected"
1700                                                     " additional extended"
1701                                                     " partition");
1702                                         } else {
1703                                                 diskblk = xstartsect +
1704                                                     LE_32(dosp[i].relsect);
1705                                         }
1706                                 }
1707                         }
1708                 } while (ldrive > logicalDriveCount + numDrives);
1709 
1710                 ASSERT(numDrives <= FD_NUMPART);
1711 
1712                 if (ldrive <= logicalDriveCount + numDrives) {
1713                         /*
1714                          * The number of logical drives we've found thus
1715                          * far is enough to get us to the one we were
1716                          * searching for.
1717                          */
1718                         driveIndex = logicalDriveCount + numDrives - ldrive;
1719                         mediasize =
1720                             LE_32(dosp[extndDrives[driveIndex]].numsect);
1721                         startsec =
1722                             LE_32(dosp[extndDrives[driveIndex]].relsect) +
1723                             lastseek;
1724                         if (startsec > (xstartsect + xnumsect)) {
1725                                 cmn_err(CE_NOTE, "!pcfs: extended partition "
1726                                     "values bad");
1727                                 return (EINVAL);
1728                         }
1729                         goto found;
1730                 } else {
1731                         /*
1732                          * We ran out of extended dos partition
1733                          * drives.  The only hope now is to go
1734                          * back to extra drives defined in the master
1735                          * fdisk table.  But we overwrote that table
1736                          * already, so we must load it in again.
1737                          */
1738                         logicalDriveCount += numDrives;
1739                         brelse(*bp);
1740                         ASSERT(fsp->pcfs_dosstart == 0);
1741                         *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1742                             fsp->pcfs_secsize);
1743                         if ((*bp)->b_flags & B_ERROR) {
1744                                 return (EIO);
1745                         }
1746                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1747                 }
1748         }
1749         /*
1750          *  Still haven't found the drive, is it an extra
1751          *  drive defined in the main FDISK table?
1752          */
1753         if (ldrive <= logicalDriveCount + numExtraDrives) {
1754                 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1755                 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1756                 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1757                 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1758                 goto found;
1759         }
1760         /*
1761          *  Still haven't found the drive, and there is
1762          *  nowhere else to look.
1763          */
1764         noLogicalDrive(ldrive);
1765         return (EINVAL);
1766 
1767 found:
1768         /*
1769          * We need this value in units of sectorsize, because PCFS' internal
1770          * offset calculations go haywire for > 512Byte sectors unless all
1771          * pcfs_.*start values are in units of sectors.
1772          * So, assign before the capacity check (that's done in DEV_BSIZE)
1773          */
1774         fsp->pcfs_dosstart = startsec;
1775 
1776         /*
1777          * convert from device sectors to proper units:
1778          *      - starting sector: DEV_BSIZE (as argument to bread())
1779          *      - media size: Bytes
1780          */
1781         startsec = pc_dbdaddr(fsp, startsec);
1782         mediasize *= fsp->pcfs_secsize;
1783 
1784         /*
1785          * some additional validation / warnings in case the partition table
1786          * and the actual media capacity are not in accordance ...
1787          */
1788         if (fsp->pcfs_mediasize != 0) {
1789                 diskaddr_t startoff =
1790                     (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1791 
1792                 if (startoff >= fsp->pcfs_mediasize ||
1793                     startoff + mediasize > fsp->pcfs_mediasize) {
1794                         cmn_err(CE_WARN,
1795                             "!pcfs: partition size (LBA start %u, %lld bytes, "
1796                             "device (%x.%x):%d) smaller than "
1797                             "mediasize (%lld bytes).\n"
1798                             "filesystem may be truncated, access errors "
1799                             "may result.\n",
1800                             (uint_t)startsec, (long long)mediasize,
1801                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1802                             fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1803                 }
1804         } else {
1805                 fsp->pcfs_mediasize = mediasize;
1806         }
1807 
1808         return (0);
1809 }
1810 
1811 
1812 static fattype_t
1813 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1814 {
1815         uint32_t ncl = fsp->pcfs_ncluster;
1816 
1817         if (ncl <= 4096) {
1818                 if (bpb_get_FatSz16(bpb) == 0)
1819                         return (FAT_UNKNOWN);
1820 
1821                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1822                     bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1823                         return (FAT12);
1824                 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1825                         return (FAT12);
1826                 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1827                         return (FAT16);
1828 
1829                 switch (bpb_get_Media(bpb)) {
1830                         case SS8SPT:
1831                         case DS8SPT:
1832                         case SS9SPT:
1833                         case DS9SPT:
1834                         case DS18SPT:
1835                         case DS9_15SPT:
1836                                 /*
1837                                  * Is this reliable - all floppies are FAT12 ?
1838                                  */
1839                                 return (FAT12);
1840                         case MD_FIXED:
1841                                 /*
1842                                  * Is this reliable - disks are always FAT16 ?
1843                                  */
1844                                 return (FAT16);
1845                         default:
1846                                 break;
1847                 }
1848         } else if (ncl <= 65536) {
1849                 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1850                         return (FAT32);
1851                 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1852                         return (FAT32);
1853                 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1854                         return (FAT32);
1855 
1856                 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1857                         return (FAT16);
1858                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1859                         return (FAT16);
1860         }
1861 
1862         /*
1863          * We don't know
1864          */
1865         return (FAT_UNKNOWN);
1866 }
1867 
1868 /*
1869  * Check to see if the BPB we found is correct.
1870  *
1871  * This looks far more complicated that it needs to be for pure structural
1872  * validation. The reason for this is that parseBPB() is also used for
1873  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1874  * BPB fields have 'known good' values, even if we do not reject the BPB
1875  * when attempting to mount the filesystem.
1876  */
1877 static int
1878 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1879 {
1880         fattype_t type;
1881 
1882         uint32_t        ncl;    /* number of clusters in file area */
1883         uint32_t        rec;
1884         uint32_t        reserved;
1885         uint32_t        fsisec, bkbootsec;
1886         blkcnt_t        totsec, totsec16, totsec32, datasec;
1887         size_t          fatsec, fatsec16, fatsec32, rdirsec;
1888         size_t          secsize;
1889         len_t           mediasize;
1890         uint64_t        validflags = 0;
1891 
1892         if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1893                 validflags |= BPB_BPBSIG_OK;
1894 
1895         rec = bpb_get_RootEntCnt(bpb);
1896         reserved = bpb_get_RsvdSecCnt(bpb);
1897         fsisec = bpb_get_FSInfo32(bpb);
1898         bkbootsec = bpb_get_BkBootSec32(bpb);
1899         totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1900         totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1901         fatsec16 = bpb_get_FatSz16(bpb);
1902         fatsec32 = bpb_get_FatSz32(bpb);
1903 
1904         totsec = totsec16 ? totsec16 : totsec32;
1905         fatsec = fatsec16 ? fatsec16 : fatsec32;
1906 
1907         secsize = bpb_get_BytesPerSec(bpb);
1908         if (!VALID_SECSIZE(secsize))
1909                 secsize = fsp->pcfs_secsize;
1910         if (secsize != fsp->pcfs_secsize) {
1911                 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1912                     getmajor(fsp->pcfs_xdev),
1913                     getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1914                 PC_DPRINTF2(3, "!BPB secsize %d != "
1915                     "autodetected media block size %d\n",
1916                     (int)secsize, (int)fsp->pcfs_secsize);
1917                 if (fsp->pcfs_ldrive) {
1918                         /*
1919                          * We've already attempted to parse the partition
1920                          * table. If the block size used for that don't match
1921                          * the PCFS sector size, we're hosed one way or the
1922                          * other. Just try what happens.
1923                          */
1924                         secsize = fsp->pcfs_secsize;
1925                         PC_DPRINTF1(3,
1926                             "!pcfs: Using autodetected secsize %d\n",
1927                             (int)secsize);
1928                 } else {
1929                         /*
1930                          * This allows mounting lofi images of PCFS partitions
1931                          * with sectorsize != DEV_BSIZE. We can't parse the
1932                          * partition table on whole-disk images unless the
1933                          * (undocumented) "secsize=..." mount option is used,
1934                          * but at least this allows us to mount if we have
1935                          * an image of a partition.
1936                          */
1937                         PC_DPRINTF1(3,
1938                             "!pcfs: Using BPB secsize %d\n", (int)secsize);
1939                 }
1940         }
1941 
1942         if (fsp->pcfs_mediasize == 0) {
1943                 mediasize = (len_t)totsec * (len_t)secsize;
1944                 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1945                     "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1946                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1947                     fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1948         } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1949                 cmn_err(CE_WARN,
1950                     "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1951                     "FAT BPB mediasize (%lld Bytes).\n"
1952                     "truncated filesystem on device (%x.%x):%d, access errors "
1953                     "possible.\n",
1954                     (long long)fsp->pcfs_mediasize,
1955                     (long long)(totsec * (blkcnt_t)secsize),
1956                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1957                     fsp->pcfs_ldrive);
1958                 mediasize = fsp->pcfs_mediasize;
1959         } else {
1960                 /*
1961                  * This is actually ok. A FAT needs not occupy the maximum
1962                  * space available in its partition, it can be shorter.
1963                  */
1964                 mediasize = (len_t)totsec * (len_t)secsize;
1965         }
1966 
1967         /*
1968          * Since we let just about anything pass through this function,
1969          * fence against divide-by-zero here.
1970          */
1971         if (secsize)
1972                 rdirsec = roundup(rec * 32, secsize) / secsize;
1973         else
1974                 rdirsec = 0;
1975 
1976         /*
1977          * This assignment is necessary before pc_dbdaddr() can first be
1978          * used. Must initialize the value here.
1979          */
1980         fsp->pcfs_secsize = secsize;
1981         fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
1982 
1983         fsp->pcfs_mediasize = mediasize;
1984 
1985         fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
1986         fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
1987         fsp->pcfs_mediadesc = bpb_get_Media(bpb);
1988         fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
1989         fsp->pcfs_rdirsec = rdirsec;
1990 
1991         /*
1992          * Remember: All PCFS offset calculations in sectors. Before I/O
1993          * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
1994          * necessary so that media with > 512Byte sector sizes work correctly.
1995          */
1996         fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
1997         fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
1998         fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
1999         datasec = totsec -
2000             (blkcnt_t)fatsec * fsp->pcfs_numfat -
2001             (blkcnt_t)rdirsec -
2002             (blkcnt_t)reserved;
2003 
2004         DTRACE_PROBE4(fatgeometry,
2005             blkcnt_t, totsec, size_t, fatsec,
2006             size_t, rdirsec, blkcnt_t, datasec);
2007 
2008         /*
2009          * UINT32_MAX is an underflow check - we calculate in "blkcnt_t" which
2010          * is 64bit in order to be able to catch "impossible" sector counts.
2011          * A sector count in FAT must fit 32bit unsigned int.
2012          */
2013         if (totsec != 0 &&
2014             (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2015             (len_t)totsec * (len_t)secsize <= mediasize &&
2016             datasec < totsec && datasec <= UINT32_MAX)
2017                 validflags |= BPB_TOTSEC_OK;
2018 
2019         if (mediasize >= (len_t)datasec * (len_t)secsize)
2020                 validflags |= BPB_MEDIASZ_OK;
2021 
2022         if (VALID_SECSIZE(secsize))
2023                 validflags |= BPB_SECSIZE_OK;
2024         if (VALID_SPCL(fsp->pcfs_spcl))
2025                 validflags |= BPB_SECPERCLUS_OK;
2026         if (VALID_CLSIZE(fsp->pcfs_clsize))
2027                 validflags |= BPB_CLSIZE_OK;
2028         if (VALID_NUMFATS(fsp->pcfs_numfat))
2029                 validflags |= BPB_NUMFAT_OK;
2030         if (VALID_RSVDSEC(reserved) && reserved < totsec)
2031                 validflags |= BPB_RSVDSECCNT_OK;
2032         if (VALID_MEDIA(fsp->pcfs_mediadesc))
2033                 validflags |= BPB_MEDIADESC_OK;
2034         if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2035                 validflags |= BPB_BOOTSIG16_OK;
2036         if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2037                 validflags |= BPB_BOOTSIG32_OK;
2038         if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2039                 validflags |= BPB_FSTYPSTR16_OK;
2040         if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2041                 validflags |= BPB_FSTYPSTR32_OK;
2042         if (VALID_OEMNAME(bpb_OEMName(bpb)))
2043                 validflags |= BPB_OEMNAME_OK;
2044         if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2045                 validflags |= BPB_BKBOOTSEC_OK;
2046         if (fsisec > 0 && fsisec <= reserved)
2047                 validflags |= BPB_FSISEC_OK;
2048         if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2049                 validflags |= BPB_JMPBOOT_OK;
2050         if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2051                 validflags |= BPB_FSVER_OK;
2052         if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2053                 validflags |= BPB_VOLLAB16_OK;
2054         if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2055                 validflags |= BPB_VOLLAB32_OK;
2056         if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2057                 validflags |= BPB_EXTFLAGS_OK;
2058 
2059         /*
2060          * Try to determine which FAT format to use.
2061          *
2062          * Calculate the number of clusters in order to determine
2063          * the type of FAT we are looking at.  This is the only
2064          * recommended way of determining FAT type, though there
2065          * are other hints in the data, this is the best way.
2066          *
2067          * Since we let just about "anything" pass through this function
2068          * without early exits, fence against divide-by-zero here.
2069          *
2070          * datasec was already validated against UINT32_MAX so we know
2071          * the result will not overflow the 32bit calculation.
2072          */
2073         if (fsp->pcfs_spcl)
2074                 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2075         else
2076                 ncl = 0;
2077 
2078         fsp->pcfs_ncluster = ncl;
2079 
2080         /*
2081          * From the Microsoft FAT specification:
2082          * In the following example, when it says <, it does not mean <=.
2083          * Note also that the numbers are correct.  The first number for
2084          * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2085          * and the '<' signs are not wrong.
2086          *
2087          * We "specialdetect" the corner cases, and use at least one "extra"
2088          * criterion to decide whether it's FAT16 or FAT32 if the cluster
2089          * count is dangerously close to the boundaries.
2090          */
2091 
2092         if (ncl <= PCF_FIRSTCLUSTER) {
2093                 type = FAT_UNKNOWN;
2094         } else if (ncl < 4085) {
2095                 type = FAT12;
2096         } else if (ncl <= 4096) {
2097                 type = FAT_QUESTIONABLE;
2098         } else if (ncl < 65525) {
2099                 type = FAT16;
2100         } else if (ncl <= 65536) {
2101                 type = FAT_QUESTIONABLE;
2102         } else if (ncl < PCF_LASTCLUSTER32) {
2103                 type = FAT32;
2104         } else {
2105                 type = FAT_UNKNOWN;
2106         }
2107 
2108         DTRACE_PROBE4(parseBPB__initial,
2109             struct pcfs *, fsp, unsigned char *, bpb,
2110             int, validflags, fattype_t, type);
2111 
2112 recheck:
2113         fsp->pcfs_fatsec = fatsec;
2114 
2115         /* Do some final sanity checks for each specific type of FAT */
2116         switch (type) {
2117                 case FAT12:
2118                         if (rec != 0)
2119                                 validflags |= BPB_ROOTENTCNT_OK;
2120                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2121                             bpb_get_TotSec16(bpb) == 0)
2122                                 validflags |= BPB_TOTSEC16_OK;
2123                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2124                             bpb_get_TotSec32(bpb) == 0)
2125                                 validflags |= BPB_TOTSEC32_OK;
2126                         if (bpb_get_FatSz16(bpb) == fatsec)
2127                                 validflags |= BPB_FATSZ16_OK;
2128                         if (fatsec * secsize >= ncl * 3 / 2)
2129                                 validflags |= BPB_FATSZ_OK;
2130                         if (ncl < 4085)
2131                                 validflags |= BPB_NCLUSTERS_OK;
2132 
2133                         fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2134                         fsp->pcfs_rootblksize =
2135                             fsp->pcfs_rdirsec * secsize;
2136                         fsp->pcfs_fsistart = 0;
2137 
2138                         if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2139                                 type = FAT_UNKNOWN;
2140                         break;
2141                 case FAT16:
2142                         if (rec != 0)
2143                                 validflags |= BPB_ROOTENTCNT_OK;
2144                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2145                             bpb_get_TotSec16(bpb) == 0)
2146                                 validflags |= BPB_TOTSEC16_OK;
2147                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2148                             bpb_get_TotSec32(bpb) == 0)
2149                                 validflags |= BPB_TOTSEC32_OK;
2150                         if (bpb_get_FatSz16(bpb) == fatsec)
2151                                 validflags |= BPB_FATSZ16_OK;
2152                         if (fatsec * secsize >= ncl * 2)
2153                                 validflags |= BPB_FATSZ_OK;
2154                         if (ncl >= 4085 && ncl < 65525)
2155                                 validflags |= BPB_NCLUSTERS_OK;
2156 
2157                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2158                         fsp->pcfs_rootblksize =
2159                             fsp->pcfs_rdirsec * secsize;
2160                         fsp->pcfs_fsistart = 0;
2161 
2162                         if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2163                                 type = FAT_UNKNOWN;
2164                         break;
2165                 case FAT32:
2166                         if (rec == 0)
2167                                 validflags |= BPB_ROOTENTCNT_OK;
2168                         if (bpb_get_TotSec16(bpb) == 0)
2169                                 validflags |= BPB_TOTSEC16_OK;
2170                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2171                                 validflags |= BPB_TOTSEC32_OK;
2172                         if (bpb_get_FatSz16(bpb) == 0)
2173                                 validflags |= BPB_FATSZ16_OK;
2174                         if (bpb_get_FatSz32(bpb) == fatsec)
2175                                 validflags |= BPB_FATSZ32_OK;
2176                         if (fatsec * secsize >= ncl * 4)
2177                                 validflags |= BPB_FATSZ_OK;
2178                         if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2179                                 validflags |= BPB_NCLUSTERS_OK;
2180 
2181                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2182                         fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2183                         fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2184                         if (validflags & BPB_FSISEC_OK)
2185                                 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2186                         fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2187                         if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2188                                 validflags |= BPB_ROOTCLUSTER_OK;
2189 
2190                         /*
2191                          * Current PCFS code only works if 'pcfs_rdirstart'
2192                          * contains the root cluster number on FAT32.
2193                          * That's a mis-use and would better be changed.
2194                          */
2195                         fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2196 
2197                         if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2198                                 type = FAT_UNKNOWN;
2199                         break;
2200                 case FAT_QUESTIONABLE:
2201                         type = secondaryBPBChecks(fsp, bpb, secsize);
2202                         goto recheck;
2203                 default:
2204                         ASSERT(type == FAT_UNKNOWN);
2205                         break;
2206         }
2207 
2208         ASSERT(type != FAT_QUESTIONABLE);
2209 
2210         fsp->pcfs_fattype = type;
2211 
2212         if (valid)
2213                 *valid = validflags;
2214 
2215         DTRACE_PROBE4(parseBPB__final,
2216             struct pcfs *, fsp, unsigned char *, bpb,
2217             int, validflags, fattype_t, type);
2218 
2219         if (type != FAT_UNKNOWN) {
2220                 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2221                 ASSERT(ISP2(secsize / DEV_BSIZE));
2222                 return (1);
2223         }
2224 
2225         return (0);
2226 }
2227 
2228 
2229 /*
2230  * Detect the device's native block size (sector size).
2231  *
2232  * Test whether the device is:
2233  *      - a floppy device from a known controller type via DKIOCINFO
2234  *      - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2235  *      - a PCMCIA sram memory card (pseudofloppy) using pcram(7d)
2236  *      - a USB floppy drive (identified by drive geometry)
2237  *
2238  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2239  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2240  *
2241  * This might be a bit wasteful on kernel stack space; if anyone's
2242  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2243  */
2244 static void
2245 pcfs_device_getinfo(struct pcfs *fsp)
2246 {
2247         dev_t                   rdev = fsp->pcfs_xdev;
2248         int                     error;
2249         union {
2250                 struct dk_minfo         mi;
2251                 struct dk_cinfo         ci;
2252                 struct dk_geom          gi;
2253                 struct fd_char          fc;
2254         } arg;                          /* save stackspace ... */
2255         intptr_t argp = (intptr_t)&arg;
2256         ldi_handle_t            lh;
2257         ldi_ident_t             li;
2258         int isfloppy, isremoveable, ishotpluggable;
2259         cred_t                  *cr = CRED();
2260 
2261         if (ldi_ident_from_dev(rdev, &li))
2262                 goto out;
2263 
2264         error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2265         ldi_ident_release(li);
2266         if (error)
2267                 goto out;
2268 
2269         /*
2270          * Not sure if this could possibly happen. It'd be a bit like
2271          * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2272          * expecting it, needs some thought if triggered ...
2273          */
2274         ASSERT(fsp->pcfs_xdev == rdev);
2275 
2276         /*
2277          * Check for removeable/hotpluggable media.
2278          */
2279         if (ldi_ioctl(lh, DKIOCREMOVABLE,
2280             (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2281                 isremoveable = 0;
2282         }
2283         if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2284             (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2285                 ishotpluggable = 0;
2286         }
2287 
2288         /*
2289          * Make sure we don't use "half-initialized" values if the ioctls fail.
2290          */
2291         if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2292                 bzero(&arg, sizeof (arg));
2293                 fsp->pcfs_mediasize = 0;
2294         } else {
2295                 fsp->pcfs_mediasize =
2296                     (len_t)arg.mi.dki_lbsize *
2297                     (len_t)arg.mi.dki_capacity;
2298         }
2299 
2300         if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2301                 if (fsp->pcfs_secsize == 0) {
2302                         fsp->pcfs_secsize = arg.mi.dki_lbsize;
2303                         fsp->pcfs_sdshift =
2304                             ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2305                 } else {
2306                         PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2307                             "%d, device (%x.%x), different from user-provided "
2308                             "%d. User override - ignoring autodetect result.\n",
2309                             arg.mi.dki_lbsize,
2310                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2311                             fsp->pcfs_secsize);
2312                 }
2313         } else if (arg.mi.dki_lbsize) {
2314                 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2315                     "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2316                     "Ignoring autodetect result.\n",
2317                     arg.mi.dki_lbsize,
2318                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2319         }
2320 
2321         /*
2322          * We treat the following media types as a floppy by default.
2323          */
2324         isfloppy =
2325             (arg.mi.dki_media_type == DK_FLOPPY ||
2326             arg.mi.dki_media_type == DK_ZIP ||
2327             arg.mi.dki_media_type == DK_JAZ);
2328 
2329         /*
2330          * if this device understands fdio(7I) requests it's
2331          * obviously a floppy drive.
2332          */
2333         if (!isfloppy &&
2334             !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2335                 isfloppy = 1;
2336 
2337         /*
2338          * some devices (PCMCIA pseudofloppies) we like to treat
2339          * as floppies, but they don't understand fdio(7I) requests.
2340          */
2341         if (!isfloppy &&
2342             !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2343             (arg.ci.dki_ctype == DKC_WDC2880 ||
2344             arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2345             arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2346             arg.ci.dki_ctype == DKC_INTEL82077 ||
2347             (arg.ci.dki_ctype == DKC_PCMCIA_MEM &&
2348             arg.ci.dki_flags & DKI_PCMCIA_PFD)))
2349                 isfloppy = 1;
2350 
2351         /*
2352          * This is the "final fallback" test - media with
2353          * 2 heads and 80 cylinders are assumed to be floppies.
2354          * This is normally true for USB floppy drives ...
2355          */
2356         if (!isfloppy &&
2357             !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2358             (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2359                 isfloppy = 1;
2360 
2361         /*
2362          * This is similar to the "old" PCFS code that sets this flag
2363          * just based on the media descriptor being 0xf8 (MD_FIXED).
2364          * Should be re-worked. We really need some specialcasing for
2365          * removeable media.
2366          */
2367         if (!isfloppy) {
2368                 fsp->pcfs_flags |= PCFS_NOCHK;
2369         }
2370 
2371         /*
2372          * We automatically disable access time updates if the medium is
2373          * removeable and/or hotpluggable, and the admin did not explicitly
2374          * request access time updates (via the "atime" mount option).
2375          * The majority of flash-based media should fit this category.
2376          * Minimizing write access extends the lifetime of your memory stick !
2377          */
2378         if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2379             (isremoveable || ishotpluggable | isfloppy)) {
2380                 fsp->pcfs_flags |= PCFS_NOATIME;
2381         }
2382 
2383         (void) ldi_close(lh, FREAD, cr);
2384 out:
2385         if (fsp->pcfs_secsize == 0) {
2386                 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2387                     "device (%x.%x) failed, no user-provided fallback. "
2388                     "Using %d bytes.\n",
2389                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2390                     DEV_BSIZE);
2391                 fsp->pcfs_secsize = DEV_BSIZE;
2392                 fsp->pcfs_sdshift = 0;
2393         }
2394         ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2395         ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2396 }
2397 
2398 /*
2399  * Get the FAT type for the DOS medium.
2400  *
2401  * -------------------------
2402  * According to Microsoft:
2403  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2404  * count of clusters on the volume and nothing else.
2405  * -------------------------
2406  *
2407  */
2408 static int
2409 pc_getfattype(struct pcfs *fsp)
2410 {
2411         int error = 0;
2412         buf_t *bp = NULL;
2413         struct vnode *devvp = fsp->pcfs_devvp;
2414         dev_t   dev = devvp->v_rdev;
2415 
2416         /*
2417          * Detect the native block size of the medium, and attempt to
2418          * detect whether the medium is removeable.
2419          * We do treat removeable media (floppies, PCMCIA memory cards,
2420          * USB and FireWire disks) differently wrt. to the frequency
2421          * and synchronicity of FAT updates.
2422          * We need to know the media block size in order to be able to
2423          * parse the partition table.
2424          */
2425         pcfs_device_getinfo(fsp);
2426 
2427         /*
2428          * Unpartitioned media (floppies and some removeable devices)
2429          * don't have a partition table, the FAT BPB is at disk block 0.
2430          * Start out by reading block 0.
2431          */
2432         fsp->pcfs_dosstart = 0;
2433         bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2434 
2435         if (error = geterror(bp))
2436                 goto out;
2437 
2438         /*
2439          * If a logical drive number is requested, parse the partition table
2440          * and attempt to locate it. Otherwise, proceed immediately to the
2441          * BPB check. findTheDrive(), if successful, returns the disk block
2442          * number where the requested partition starts in "startsec".
2443          */
2444         if (fsp->pcfs_ldrive != 0) {
2445                 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2446                     "device (%x,%x):%d to find BPB\n",
2447                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2448 
2449                 if (error = findTheDrive(fsp, &bp))
2450                         goto out;
2451 
2452                 ASSERT(fsp->pcfs_dosstart != 0);
2453 
2454                 brelse(bp);
2455                 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2456                     fsp->pcfs_secsize);
2457                 if (error = geterror(bp))
2458                         goto out;
2459         }
2460 
2461         /*
2462          * Validate the BPB and fill in the instance structure.
2463          */
2464         if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2465                 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2466                     "device (%x.%x):%d, disk LBA %u\n",
2467                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2468                     (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2469                 error = EINVAL;
2470                 goto out;
2471         }
2472 
2473         ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2474 
2475 out:
2476         /*
2477          * Release the buffer used
2478          */
2479         if (bp != NULL)
2480                 brelse(bp);
2481         return (error);
2482 }
2483 
2484 
2485 /*
2486  * Get the file allocation table.
2487  * If there is an old FAT, invalidate it.
2488  */
2489 int
2490 pc_getfat(struct pcfs *fsp)
2491 {
2492         struct buf *bp = NULL;
2493         uchar_t *fatp = NULL;
2494         uchar_t *fat_changemap = NULL;
2495         int error;
2496         int fat_changemapsize;
2497         int flags = 0;
2498         int nfat;
2499         int altfat_mustmatch = 0;
2500         int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2501 
2502         if (fsp->pcfs_fatp) {
2503                 /*
2504                  * There is a FAT in core.
2505                  * If there are open file pcnodes or we have modified it or
2506                  * it hasn't timed out yet use the in core FAT.
2507                  * Otherwise invalidate it and get a new one
2508                  */
2509 #ifdef notdef
2510                 if (fsp->pcfs_frefs ||
2511                     (fsp->pcfs_flags & PCFS_FATMOD) ||
2512                     (gethrestime_sec() < fsp->pcfs_fattime)) {
2513                         return (0);
2514                 } else {
2515                         mutex_enter(&pcfslock);
2516                         pc_invalfat(fsp);
2517                         mutex_exit(&pcfslock);
2518                 }
2519 #endif /* notdef */
2520                 return (0);
2521         }
2522 
2523         /*
2524          * Get FAT and check it for validity
2525          */
2526         fatp = kmem_alloc(fatsize, KM_SLEEP);
2527         error = pc_readfat(fsp, fatp);
2528         if (error) {
2529                 flags = B_ERROR;
2530                 goto out;
2531         }
2532         fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2533         fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2534         fsp->pcfs_fatp = fatp;
2535         fsp->pcfs_fat_changemapsize = fat_changemapsize;
2536         fsp->pcfs_fat_changemap = fat_changemap;
2537 
2538         /*
2539          * The only definite signature check is that the
2540          * media descriptor byte should match the first byte
2541          * of the FAT block.
2542          */
2543         if (fatp[0] != fsp->pcfs_mediadesc) {
2544                 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2545                     "media descriptor %x, FAT[0] lowbyte %x\n",
2546                     (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2547                 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2548                 altfat_mustmatch = 1;
2549         }
2550 
2551         /*
2552          * Get alternate FATs and check for consistency
2553          * This is an inlined version of pc_readfat().
2554          * Since we're only comparing FAT and alternate FAT,
2555          * there's no reason to let pc_readfat() copy data out
2556          * of the buf. Instead, compare in-situ, one cluster
2557          * at a time.
2558          */
2559         for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2560                 size_t startsec;
2561                 size_t off;
2562 
2563                 startsec = pc_dbdaddr(fsp,
2564                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2565 
2566                 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2567                         daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2568                             pc_cltodb(fsp, pc_lblkno(fsp, off)));
2569 
2570                         bp = bread(fsp->pcfs_xdev, fatblk,
2571                             MIN(fsp->pcfs_clsize, fatsize - off));
2572                         if (bp->b_flags & (B_ERROR | B_STALE)) {
2573                                 cmn_err(CE_NOTE,
2574                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2575                                     " read error at offset %ld on device"
2576                                     " (%x.%x):%d",
2577                                     nfat, (void *)(uintptr_t)startsec, off,
2578                                     getmajor(fsp->pcfs_xdev),
2579                                     getminor(fsp->pcfs_xdev),
2580                                     fsp->pcfs_ldrive);
2581                                 flags = B_ERROR;
2582                                 error = EIO;
2583                                 goto out;
2584                         }
2585                         bp->b_flags |= B_STALE | B_AGE;
2586                         if (bcmp(bp->b_un.b_addr, fatp + off,
2587                             MIN(fsp->pcfs_clsize, fatsize - off))) {
2588                                 cmn_err(CE_NOTE,
2589                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2590                                     " corrupted at offset %ld on device"
2591                                     " (%x.%x):%d",
2592                                     nfat, (void *)(uintptr_t)startsec, off,
2593                                     getmajor(fsp->pcfs_xdev),
2594                                     getminor(fsp->pcfs_xdev),
2595                                     fsp->pcfs_ldrive);
2596                                 if (altfat_mustmatch) {
2597                                         flags = B_ERROR;
2598                                         error = EIO;
2599                                         goto out;
2600                                 }
2601                         }
2602                         brelse(bp);
2603                         bp = NULL;      /* prevent double release */
2604                 }
2605         }
2606 
2607         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2608         fsp->pcfs_fatjustread = 1;
2609 
2610         /*
2611          * Retrieve FAT32 fsinfo sector.
2612          * A failure to read this is not fatal to accessing the volume.
2613          * It simply means operations that count or search free blocks
2614          * will have to do a full FAT walk, vs. a possibly quicker lookup
2615          * of the summary information.
2616          * Hence, we log a message but return success overall after this point.
2617          */
2618         if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2619                 struct fat_od_fsi *fsinfo_disk;
2620 
2621                 bp = bread(fsp->pcfs_xdev,
2622                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2623                 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2624                 if (bp->b_flags & (B_ERROR | B_STALE) ||
2625                     !FSISIG_OK(fsinfo_disk)) {
2626                         cmn_err(CE_NOTE,
2627                             "!pcfs: error reading fat32 fsinfo from "
2628                             "device (%x.%x):%d, block %lld",
2629                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2630                             fsp->pcfs_ldrive,
2631                             (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2632                         fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2633                         fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2634                         fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2635                 } else {
2636                         bp->b_flags |= B_STALE | B_AGE;
2637                         fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2638                         fsp->pcfs_fsinfo.fs_free_clusters =
2639                             LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2640                         fsp->pcfs_fsinfo.fs_next_free =
2641                             LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2642                 }
2643                 brelse(bp);
2644                 bp = NULL;
2645         }
2646 
2647         if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2648                 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2649         else
2650                 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2651 
2652         return (0);
2653 
2654 out:
2655         cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2656         if (bp)
2657                 brelse(bp);
2658         if (fatp)
2659                 kmem_free(fatp, fatsize);
2660         if (fat_changemap)
2661                 kmem_free(fat_changemap, fat_changemapsize);
2662 
2663         if (flags) {
2664                 pc_mark_irrecov(fsp);
2665         }
2666         return (error);
2667 }