1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "@(#)pc_vfsops.c        1.104   07/10/25 SMI"
  27 
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/kmem.h>
  31 #include <sys/user.h>
  32 #include <sys/proc.h>
  33 #include <sys/cred.h>
  34 #include <sys/disp.h>
  35 #include <sys/buf.h>
  36 #include <sys/vfs.h>
  37 #include <sys/vfs_opreg.h>
  38 #include <sys/vnode.h>
  39 #include <sys/fdio.h>
  40 #include <sys/file.h>
  41 #include <sys/uio.h>
  42 #include <sys/conf.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/mount.h>
  45 #include <sys/pathname.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/debug.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/conf.h>
  50 #include <sys/mkdev.h>
  51 #include <sys/swap.h>
  52 #include <sys/sunddi.h>
  53 #include <sys/sunldi.h>
  54 #include <sys/dktp/fdisk.h>
  55 #include <sys/fs/pc_label.h>
  56 #include <sys/fs/pc_fs.h>
  57 #include <sys/fs/pc_dir.h>
  58 #include <sys/fs/pc_node.h>
  59 #include <fs/fs_subr.h>
  60 #include <sys/modctl.h>
  61 #include <sys/dkio.h>
  62 #include <sys/open.h>
  63 #include <sys/mntent.h>
  64 #include <sys/policy.h>
  65 #include <sys/atomic.h>
  66 #include <sys/sdt.h>
  67 
  68 /*
  69  * The majority of PC media use a 512 sector size, but
  70  * occasionally you will run across a 1k sector size.
  71  * For media with a 1k sector size, fd_strategy() requires
  72  * the I/O size to be a 1k multiple; so when the sector size
  73  * is not yet known, always read 1k.
  74  */
  75 #define PC_SAFESECSIZE  (PC_SECSIZE * 2)
  76 
  77 static int pcfs_pseudo_floppy(dev_t);
  78 
  79 static int pcfsinit(int, char *);
  80 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
  81         struct cred *);
  82 static int pcfs_unmount(struct vfs *, int, struct cred *);
  83 static int pcfs_root(struct vfs *, struct vnode **);
  84 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
  85 static int pc_syncfsnodes(struct pcfs *);
  86 static int pcfs_sync(struct vfs *, short, struct cred *);
  87 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
  88 static void pcfs_freevfs(vfs_t *vfsp);
  89 
  90 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
  91 static int pc_writefat(struct pcfs *fsp, daddr_t start);
  92 
  93 static int pc_getfattype(struct pcfs *fsp);
  94 static void pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap);
  95 
  96 
  97 /*
  98  * pcfs mount options table
  99  */
 100 
 101 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
 102 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
 103 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
 104 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
 105 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
 106 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
 107 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
 108 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
 109 
 110 static mntopt_t mntopts[] = {
 111 /*
 112  *      option name     cancel option   default arg     flags   opt data
 113  */
 114         { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
 115         { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
 116         { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
 117         { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
 118         { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
 119         { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
 120         { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
 121         { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
 122         { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
 123         { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
 124 };
 125 
 126 static mntopts_t pcfs_mntopts = {
 127         sizeof (mntopts) / sizeof (mntopt_t),
 128         mntopts
 129 };
 130 
 131 int pcfsdebuglevel = 0;
 132 
 133 /*
 134  * pcfslock:    protects the list of mounted pc filesystems "pc_mounttab.
 135  * pcfs_lock:   (inside per filesystem structure "pcfs")
 136  *              per filesystem lock. Most of the vfsops and vnodeops are
 137  *              protected by this lock.
 138  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
 139  *
 140  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
 141  *
 142  * pcfs_mountcount:     used to prevent module unloads while there is still
 143  *                      pcfs state from a former mount hanging around. With
 144  *                      forced umount support, the filesystem module must not
 145  *                      be allowed to go away before the last VFS_FREEVFS()
 146  *                      call has been made.
 147  *                      Since this is just an atomic counter, there's no need
 148  *                      for locking.
 149  */
 150 kmutex_t        pcfslock;
 151 krwlock_t       pcnodes_lock;
 152 uint32_t        pcfs_mountcount;
 153 
 154 static int pcfstype;
 155 
 156 static vfsdef_t vfw = {
 157         VFSDEF_VERSION,
 158         "pcfs",
 159         pcfsinit,
 160         VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS,
 161         &pcfs_mntopts
 162 };
 163 
 164 extern struct mod_ops mod_fsops;
 165 
 166 static struct modlfs modlfs = {
 167         &mod_fsops,
 168         "PC filesystem v1.2",
 169         &vfw
 170 };
 171 
 172 static struct modlinkage modlinkage = {
 173         MODREV_1,
 174         &modlfs,
 175         NULL
 176 };
 177 
 178 int
 179 _init(void)
 180 {
 181         int     error;
 182 
 183 #if !defined(lint)
 184         /* make sure the on-disk structures are sane */
 185         ASSERT(sizeof (struct pcdir) == 32);
 186         ASSERT(sizeof (struct pcdir_lfn) == 32);
 187 #endif
 188         mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
 189         rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
 190         error = mod_install(&modlinkage);
 191         if (error) {
 192                 mutex_destroy(&pcfslock);
 193                 rw_destroy(&pcnodes_lock);
 194         }
 195         return (error);
 196 }
 197 
 198 int
 199 _fini(void)
 200 {
 201         int     error;
 202 
 203         /*
 204          * If a forcedly unmounted instance is still hanging around,
 205          * we cannot allow the module to be unloaded because that would
 206          * cause panics once the VFS framework decides it's time to call
 207          * into VFS_FREEVFS().
 208          */
 209         if (pcfs_mountcount)
 210                 return (EBUSY);
 211 
 212         error = mod_remove(&modlinkage);
 213         if (error)
 214                 return (error);
 215         mutex_destroy(&pcfslock);
 216         rw_destroy(&pcnodes_lock);
 217         /*
 218          * Tear down the operations vectors
 219          */
 220         (void) vfs_freevfsops_by_type(pcfstype);
 221         vn_freevnodeops(pcfs_fvnodeops);
 222         vn_freevnodeops(pcfs_dvnodeops);
 223         return (0);
 224 }
 225 
 226 int
 227 _info(struct modinfo *modinfop)
 228 {
 229         return (mod_info(&modlinkage, modinfop));
 230 }
 231 
 232 /* ARGSUSED1 */
 233 static int
 234 pcfsinit(int fstype, char *name)
 235 {
 236         static const fs_operation_def_t pcfs_vfsops_template[] = {
 237                 VFSNAME_MOUNT,          { .vfs_mount = pcfs_mount },
 238                 VFSNAME_UNMOUNT,        { .vfs_unmount = pcfs_unmount },
 239                 VFSNAME_ROOT,           { .vfs_root = pcfs_root },
 240                 VFSNAME_STATVFS,        { .vfs_statvfs = pcfs_statvfs },
 241                 VFSNAME_SYNC,           { .vfs_sync = pcfs_sync },
 242                 VFSNAME_VGET,           { .vfs_vget = pcfs_vget },
 243                 VFSNAME_FREEVFS,        { .vfs_freevfs = pcfs_freevfs },
 244                 NULL,                   NULL
 245         };
 246         int error;
 247 
 248         error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
 249         if (error != 0) {
 250                 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
 251                 return (error);
 252         }
 253 
 254         error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
 255         if (error != 0) {
 256                 (void) vfs_freevfsops_by_type(fstype);
 257                 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
 258                 return (error);
 259         }
 260 
 261         error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
 262         if (error != 0) {
 263                 (void) vfs_freevfsops_by_type(fstype);
 264                 vn_freevnodeops(pcfs_fvnodeops);
 265                 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
 266                 return (error);
 267         }
 268 
 269         pcfstype = fstype;
 270         (void) pc_init();
 271         pcfs_mountcount = 0;
 272         return (0);
 273 }
 274 
 275 static struct pcfs *pc_mounttab = NULL;
 276 
 277 extern struct pcfs_args pc_tz;
 278 
 279 /*
 280  *  Define some special logical drives we use internal to this file.
 281  */
 282 #define BOOT_PARTITION_DRIVE    99
 283 #define PRIMARY_DOS_DRIVE       1
 284 #define UNPARTITIONED_DRIVE     0
 285 
 286 static int
 287 pcfs_device_identify(
 288         struct vfs *vfsp,
 289         struct mounta *uap,
 290         struct cred *cr,
 291         int *dos_ldrive,
 292         dev_t *xdev)
 293 {
 294         struct pathname special;
 295         char *c;
 296         struct vnode *bvp;
 297         int oflag, aflag;
 298         int error;
 299 
 300         /*
 301          * Resolve path name of special file being mounted.
 302          */
 303         if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
 304                 return (error);
 305         }
 306 
 307         *dos_ldrive = -1;
 308 
 309         if (error =
 310             lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &bvp)) {
 311                 /*
 312                  * If there's no device node, the name specified most likely
 313                  * maps to a PCFS-style "partition specifier" to select a
 314                  * harddisk primary/logical partition. Disable floppy-specific
 315                  * checks in such cases unless an explicit :A or :B is
 316                  * requested.
 317                  */
 318 
 319                 /*
 320                  * Split the pathname string at the last ':' separator.
 321                  * If there's no ':' in the device name, or the ':' is the
 322                  * last character in the string, the name is invalid and
 323                  * the error from the previous lookup will be returned.
 324                  */
 325                 c = strrchr(special.pn_path, ':');
 326                 if (c == NULL || strlen(c) == 0)
 327                         goto devlookup_done;
 328 
 329                 *c++ = '\0';
 330 
 331                 /*
 332                  * PCFS partition name suffixes can be:
 333                  *      - "boot" to indicate the X86BOOT partition
 334                  *      - a drive letter [c-z] for the "DOS logical drive"
 335                  *      - a drive number 1..24 for the "DOS logical drive"
 336                  *      - a "floppy name letter", 'a' or 'b' (just strip this)
 337                  */
 338                 if (strcasecmp(c, "boot") == 0) {
 339                         /*
 340                          * The Solaris boot partition is requested.
 341                          */
 342                         *dos_ldrive = BOOT_PARTITION_DRIVE;
 343                 } else if (strspn(c, "0123456789") == strlen(c)) {
 344                         /*
 345                          * All digits - parse the partition number.
 346                          */
 347                         long drvnum = 0;
 348 
 349                         if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
 350                                 /*
 351                                  * A number alright - in the allowed range ?
 352                                  */
 353                                 if (drvnum > 24 || drvnum == 0)
 354                                         error = ENXIO;
 355                         }
 356                         if (error)
 357                                 goto devlookup_done;
 358                         *dos_ldrive = (int)drvnum;
 359                 } else if (strlen(c) == 1) {
 360                         /*
 361                          * A single trailing character was specified.
 362                          *      - [c-zC-Z] means a harddisk partition, and
 363                          *        we retrieve the partition number.
 364                          *      - [abAB] means a floppy drive, so we swallow
 365                          *        the "drive specifier" and test later
 366                          *        whether the physical device is a floppy or
 367                          *        PCMCIA pseudofloppy (sram card).
 368                          */
 369                         *c = tolower(*c);
 370                         if (*c == 'a' || *c == 'b') {
 371                                 *dos_ldrive = UNPARTITIONED_DRIVE;
 372                         } else if (*c < 'c' || *c > 'z') {
 373                                 error = ENXIO;
 374                                 goto devlookup_done;
 375                         } else {
 376                                 *dos_ldrive = 1 + *c - 'c';
 377                         }
 378                 } else {
 379                         /*
 380                          * Can't parse this - pass through previous error.
 381                          */
 382                         goto devlookup_done;
 383                 }
 384 
 385 
 386                 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
 387                     NULLVPP, &bvp);
 388         } else {
 389                 *dos_ldrive = UNPARTITIONED_DRIVE;
 390         }
 391 devlookup_done:
 392         pn_free(&special);
 393         if (error)
 394                 return (error);
 395 
 396         ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
 397 
 398         *xdev = bvp->v_rdev;
 399 
 400         /*
 401          * Verify caller's permission to open the device special file.
 402          */
 403         if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 404             ((uap->flags & MS_RDONLY) != 0)) {
 405                 oflag = FREAD;
 406                 aflag = VREAD;
 407         } else {
 408                 oflag = FREAD | FWRITE;
 409                 aflag = VREAD | VWRITE;
 410         }
 411 
 412         if (bvp->v_type != VBLK)
 413                 error = ENOTBLK;
 414         else if (getmajor(*xdev) >= devcnt)
 415                 error = ENXIO;
 416 
 417         if ((error != 0) ||
 418             (error = VOP_ACCESS(bvp, aflag, 0, cr, NULL)) != 0 ||
 419             (error = secpolicy_spec_open(cr, bvp, oflag)) != 0) {
 420                 VN_RELE(bvp);
 421                 return (error);
 422         }
 423 
 424         VN_RELE(bvp);
 425         return (0);
 426 }
 427 
 428 static int
 429 pcfs_device_ismounted(
 430         struct vfs *vfsp,
 431         int dos_ldrive,
 432         dev_t xdev,
 433         int *remounting,
 434         dev_t *pseudodev)
 435 {
 436         struct pcfs *fsp;
 437         int remount = *remounting;
 438 
 439         /*
 440          * Ensure that this logical drive isn't already mounted, unless
 441          * this is a REMOUNT request.
 442          * Note: The framework will perform this check if the "...:c"
 443          * PCFS-style "logical drive" syntax has not been used and an
 444          * actually existing physical device is backing this filesystem.
 445          * Once all block device drivers support PC-style partitioning,
 446          * this codeblock can be dropped.
 447          */
 448         *pseudodev = xdev;
 449 
 450         if (dos_ldrive) {
 451                 mutex_enter(&pcfslock);
 452                 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
 453                         if (fsp->pcfs_xdev == xdev &&
 454                             fsp->pcfs_ldrive == dos_ldrive) {
 455                                 mutex_exit(&pcfslock);
 456                                 if (remount) {
 457                                         return (0);
 458                                 } else {
 459                                         return (EBUSY);
 460                                 }
 461                         }
 462                 /*
 463                  * Assign a unique device number for the vfs
 464                  * The old way (getudev() + a constantly incrementing
 465                  * major number) was wrong because it changes vfs_dev
 466                  * across mounts and reboots, which breaks nfs file handles.
 467                  * UFS just uses the real dev_t. We can't do that because
 468                  * of the way pcfs opens fdisk partitons (the :c and :d
 469                  * partitions are on the same dev_t). Though that _might_
 470                  * actually be ok, since the file handle contains an
 471                  * absolute block number, it's probably better to make them
 472                  * different. So I think we should retain the original
 473                  * dev_t, but come up with a different minor number based
 474                  * on the logical drive that will _always_ come up the same.
 475                  * For now, we steal the upper 6 bits.
 476                  */
 477 #ifdef notdef
 478                 /* what should we do here? */
 479                 if (((getminor(xdev) >> 12) & 0x3F) != 0)
 480                         printf("whoops - upper bits used!\n");
 481 #endif
 482                 *pseudodev = makedevice(getmajor(xdev),
 483                     ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
 484                 if (vfs_devmounting(*pseudodev, vfsp)) {
 485                         mutex_exit(&pcfslock);
 486                         return (EBUSY);
 487                 }
 488                 if (vfs_devismounted(*pseudodev)) {
 489                         mutex_exit(&pcfslock);
 490                         if (remount) {
 491                                 return (0);
 492                         } else {
 493                                 return (EBUSY);
 494                         }
 495                 }
 496                 mutex_exit(&pcfslock);
 497         } else {
 498                 *pseudodev = xdev;
 499                 if (vfs_devmounting(*pseudodev, vfsp)) {
 500                         return (EBUSY);
 501                 }
 502                 if (vfs_devismounted(*pseudodev))
 503                         if (remount) {
 504                                 return (0);
 505                         } else {
 506                                 return (EBUSY);
 507                         }
 508         }
 509 
 510         /*
 511          * This is not a remount. Even if MS_REMOUNT was requested,
 512          * the caller needs to proceed as it would on an ordinary
 513          * mount.
 514          */
 515         *remounting = 0;
 516 
 517         ASSERT(*pseudodev);
 518         return (0);
 519 }
 520 
 521 /*
 522  * Get the PCFS-specific mount options from the VFS framework.
 523  * For "timezone" and "secsize", we need to parse the number
 524  * ourselves and ensure its validity.
 525  * Note: "secsize" is deliberately undocumented at this time,
 526  * it's a workaround for devices (particularly: lofi image files)
 527  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
 528  */
 529 static void
 530 pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap)
 531 {
 532         char *c;
 533         char *endptr;
 534         long l;
 535         struct vfs *vfsp = fsp->pcfs_vfs;
 536 
 537         ASSERT(fsp->pcfs_secondswest == 0);
 538         ASSERT(fsp->pcfs_secsize == 0);
 539 
 540         if (uap->flags & MS_RDONLY) {
 541                 vfsp->vfs_flag |= VFS_RDONLY;
 542                 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 543         }
 544 
 545         if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
 546                 fsp->pcfs_flags |= PCFS_HIDDEN;
 547         if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
 548                 fsp->pcfs_flags |= PCFS_FOLDCASE;
 549         if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
 550                 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
 551         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 552                 fsp->pcfs_flags |= PCFS_NOATIME;
 553 
 554         if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
 555                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 556                     endptr == c + strlen(c)) {
 557                         /*
 558                          * A number alright - in the allowed range ?
 559                          */
 560                         if (l <= -12*3600 || l >= 12*3600) {
 561                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 562                                     "'timezone' mount option - %ld "
 563                                     "is out of range. Assuming 0.", l);
 564                                 l = 0;
 565                         }
 566                 } else {
 567                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 568                             "'timezone' mount option - argument %s "
 569                             "is not a valid number. Assuming 0.", c);
 570                         l = 0;
 571                 }
 572                 fsp->pcfs_secondswest = l;
 573         }
 574 
 575         /*
 576          * The "secsize=..." mount option is a workaround for the lack of
 577          * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
 578          * partition table of a disk image and it has been partitioned with
 579          * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
 580          * images.
 581          * That should really be fixed in lofi ... this is a workaround.
 582          */
 583         if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
 584                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 585                     endptr == c + strlen(c)) {
 586                         /*
 587                          * A number alright - a valid sector size as well ?
 588                          */
 589                         if (!VALID_SECSIZE(l)) {
 590                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 591                                     "'secsize' mount option - %ld is "
 592                                     "unsupported. Autodetecting.", l);
 593                                 l = 0;
 594                         }
 595                 } else {
 596                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 597                             "'secsize' mount option - argument %s "
 598                             "is not a valid number. Autodetecting.", c);
 599                         l = 0;
 600                 }
 601                 fsp->pcfs_secsize = l;
 602                 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
 603         }
 604 }
 605 
 606 /*
 607  * vfs operations
 608  */
 609 
 610 /*
 611  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
 612  */
 613 static int
 614 pcfs_mount(
 615         struct vfs *vfsp,
 616         struct vnode *mvp,
 617         struct mounta *uap,
 618         struct cred *cr)
 619 {
 620         struct pcfs *fsp;
 621         struct vnode *devvp;
 622         dev_t pseudodev;
 623         dev_t xdev;
 624         int dos_ldrive = 0;
 625         int error;
 626         int remounting;
 627 
 628         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 629                 return (error);
 630 
 631         if (mvp->v_type != VDIR)
 632                 return (ENOTDIR);
 633 
 634         mutex_enter(&mvp->v_lock);
 635         if ((uap->flags & MS_REMOUNT) == 0 &&
 636             (uap->flags & MS_OVERLAY) == 0 &&
 637             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 638                 mutex_exit(&mvp->v_lock);
 639                 return (EBUSY);
 640         }
 641         mutex_exit(&mvp->v_lock);
 642 
 643         /*
 644          * PCFS doesn't do mount arguments anymore - everything's a mount
 645          * option these days. In order not to break existing callers, we
 646          * don't reject it yet, just warn that the data (if any) is ignored.
 647          */
 648         if (uap->datalen != 0)
 649                 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
 650                     "mount argument structures instead of mount options. "
 651                     "Ignoring mount(2) 'dataptr' argument.");
 652 
 653         /*
 654          * For most filesystems, this is just a lookupname() on the
 655          * mount pathname string. PCFS historically has to do its own
 656          * partition table parsing because not all Solaris architectures
 657          * support all styles of partitioning that PC media can have, and
 658          * hence PCFS understands "device names" that don't map to actual
 659          * physical device nodes. Parsing the "PCFS syntax" for device
 660          * names is done in pcfs_device_identify() - see there.
 661          *
 662          * Once all block device drivers that can host FAT filesystems have
 663          * been enhanced to create device nodes for all PC-style partitions,
 664          * this code can go away.
 665          */
 666         if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
 667                 return (error);
 668 
 669         /*
 670          * As with looking up the actual device to mount, PCFS cannot rely
 671          * on just the checks done by vfs_ismounted() whether a given device
 672          * is mounted already. The additional check against the "PCFS syntax"
 673          * is done in  pcfs_device_ismounted().
 674          */
 675         remounting = (uap->flags & MS_REMOUNT);
 676 
 677         if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
 678             &pseudodev))
 679                 return (error);
 680 
 681         if (remounting)
 682                 return (0);
 683 
 684         /*
 685          * Mount the filesystem.
 686          * An instance structure is required before the attempt to locate
 687          * and parse the FAT BPB. This is because mount options may change
 688          * the behaviour of the filesystem type matching code. Precreate
 689          * it and fill it in to a degree that allows parsing the mount
 690          * options.
 691          */
 692         devvp = makespecvp(xdev, VBLK);
 693         if (IS_SWAPVP(devvp)) {
 694                 VN_RELE(devvp);
 695                 return (EBUSY);
 696         }
 697         error = VOP_OPEN(&devvp,
 698             (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
 699         if (error) {
 700                 VN_RELE(devvp);
 701                 return (error);
 702         }
 703 
 704         fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
 705         fsp->pcfs_vfs = vfsp;
 706         fsp->pcfs_xdev = xdev;
 707         fsp->pcfs_devvp = devvp;
 708         fsp->pcfs_ldrive = dos_ldrive;
 709         mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
 710         vfsp->vfs_data = fsp;
 711         vfsp->vfs_dev = pseudodev;
 712         vfsp->vfs_fstype = pcfstype;
 713         vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
 714         vfsp->vfs_bcount = 0;
 715         vfsp->vfs_bsize = fsp->pcfs_clsize;
 716 
 717         pcfs_parse_mntopts(fsp, uap);
 718 
 719         /*
 720          * This is the actual "mount" - the PCFS superblock check.
 721          *
 722          * Find the requested logical drive and the FAT BPB therein.
 723          * Check device type and flag the instance if media is removeable.
 724          *
 725          * Initializes most members of the filesystem instance structure.
 726          * Returns EINVAL if no valid BPB can be found. Other errors may
 727          * occur after I/O failures, or when invalid / unparseable partition
 728          * tables are encountered.
 729          */
 730         if (error = pc_getfattype(fsp))
 731                 goto errout;
 732 
 733         /*
 734          * Validate that we can access the FAT and that it is, to the
 735          * degree we can verify here, self-consistent.
 736          */
 737         if (error = pc_verify(fsp))
 738                 goto errout;
 739 
 740         /*
 741          * Record the time of the mount, to return as an "approximate"
 742          * timestamp for the FAT root directory. Since FAT roots don't
 743          * have timestamps, this is less confusing to the user than
 744          * claiming "zero" / Jan/01/1970.
 745          */
 746         gethrestime(&fsp->pcfs_mounttime);
 747 
 748         /*
 749          * Fix up the mount options. Because "noatime" is made default on
 750          * removeable media only, a fixed disk will have neither "atime"
 751          * nor "noatime" set. We set the options explicitly depending on
 752          * the PCFS_NOATIME flag, to inform the user of what applies.
 753          * Mount option cancellation will take care that the mutually
 754          * exclusive 'other' is cleared.
 755          */
 756         vfs_setmntopt(vfsp,
 757             fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
 758             NULL, 0);
 759 
 760         /*
 761          * All clear - insert the FS instance into PCFS' list.
 762          */
 763         mutex_enter(&pcfslock);
 764         fsp->pcfs_nxt = pc_mounttab;
 765         pc_mounttab = fsp;
 766         mutex_exit(&pcfslock);
 767         atomic_inc_32(&pcfs_mountcount);
 768         return (0);
 769 
 770 errout:
 771         (void) VOP_CLOSE(devvp,
 772             vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
 773             1, (offset_t)0, cr, NULL);
 774         VN_RELE(devvp);
 775         mutex_destroy(&fsp->pcfs_lock);
 776         kmem_free(fsp, sizeof (*fsp));
 777         return (error);
 778 
 779 }
 780 
 781 static int
 782 pcfs_unmount(
 783         struct vfs *vfsp,
 784         int flag,
 785         struct cred *cr)
 786 {
 787         struct pcfs *fsp, *fsp1;
 788 
 789         if (secpolicy_fs_unmount(cr, vfsp) != 0)
 790                 return (EPERM);
 791 
 792         fsp = VFSTOPCFS(vfsp);
 793 
 794         /*
 795          * We don't have to lock fsp because the VVFSLOCK in vfs layer will
 796          * prevent lookuppn from crossing the mount point.
 797          * If this is not a forced umount request and there's ongoing I/O,
 798          * don't allow the mount to proceed.
 799          */
 800         if (flag & MS_FORCE)
 801                 vfsp->vfs_flag |= VFS_UNMOUNTED;
 802         else if (fsp->pcfs_nrefs)
 803                 return (EBUSY);
 804 
 805         mutex_enter(&pcfslock);
 806 
 807         /*
 808          * If this is a forced umount request or if the fs instance has
 809          * been marked as beyond recovery, allow the umount to proceed
 810          * regardless of state. pc_diskchanged() forcibly releases all
 811          * inactive vnodes/pcnodes.
 812          */
 813         if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
 814                 rw_enter(&pcnodes_lock, RW_WRITER);
 815                 pc_diskchanged(fsp);
 816                 rw_exit(&pcnodes_lock);
 817         }
 818 
 819         /* now there should be no pcp node on pcfhead or pcdhead. */
 820 
 821         if (fsp == pc_mounttab) {
 822                 pc_mounttab = fsp->pcfs_nxt;
 823         } else {
 824                 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
 825                         if (fsp1->pcfs_nxt == fsp)
 826                                 fsp1->pcfs_nxt = fsp->pcfs_nxt;
 827         }
 828 
 829         mutex_exit(&pcfslock);
 830 
 831         /*
 832          * Since we support VFS_FREEVFS(), there's no need to
 833          * free the fsp right now. The framework will tell us
 834          * when the right time to do so has arrived by calling
 835          * into pcfs_freevfs.
 836          */
 837         return (0);
 838 }
 839 
 840 /*
 841  * find root of pcfs
 842  */
 843 static int
 844 pcfs_root(
 845         struct vfs *vfsp,
 846         struct vnode **vpp)
 847 {
 848         struct pcfs *fsp;
 849         struct pcnode *pcp;
 850         int error;
 851 
 852         fsp = VFSTOPCFS(vfsp);
 853         if (error = pc_lockfs(fsp, 0, 0))
 854                 return (error);
 855 
 856         pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
 857         pc_unlockfs(fsp);
 858         *vpp = PCTOV(pcp);
 859         pcp->pc_flags |= PC_EXTERNAL;
 860         return (0);
 861 }
 862 
 863 /*
 864  * Get file system statistics.
 865  */
 866 static int
 867 pcfs_statvfs(
 868         struct vfs *vfsp,
 869         struct statvfs64 *sp)
 870 {
 871         struct pcfs *fsp;
 872         int error;
 873         dev32_t d32;
 874 
 875         fsp = VFSTOPCFS(vfsp);
 876         error = pc_getfat(fsp);
 877         if (error)
 878                 return (error);
 879         bzero(sp, sizeof (*sp));
 880         sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
 881         sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
 882         sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
 883         sp->f_files = (fsfilcnt64_t)-1;
 884         sp->f_ffree = (fsfilcnt64_t)-1;
 885         sp->f_favail = (fsfilcnt64_t)-1;
 886 #ifdef notdef
 887         (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
 888 #endif /* notdef */
 889         (void) cmpldev(&d32, vfsp->vfs_dev);
 890         sp->f_fsid = d32;
 891         (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
 892         sp->f_flag = vf_to_stf(vfsp->vfs_flag);
 893         sp->f_namemax = PCFNAMESIZE;
 894         return (0);
 895 }
 896 
 897 static int
 898 pc_syncfsnodes(struct pcfs *fsp)
 899 {
 900         struct pchead *hp;
 901         struct pcnode *pcp;
 902         int error;
 903 
 904         if (error = pc_lockfs(fsp, 0, 0))
 905                 return (error);
 906 
 907         if (!(error = pc_syncfat(fsp))) {
 908                 hp = pcfhead;
 909                 while (hp < & pcfhead [ NPCHASH ]) {
 910                         rw_enter(&pcnodes_lock, RW_READER);
 911                         pcp = hp->pch_forw;
 912                         while (pcp != (struct pcnode *)hp) {
 913                                 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
 914                                         if (error = pc_nodesync(pcp))
 915                                                 break;
 916                                 pcp = pcp -> pc_forw;
 917                         }
 918                         rw_exit(&pcnodes_lock);
 919                         if (error)
 920                                 break;
 921                         hp++;
 922                 }
 923         }
 924         pc_unlockfs(fsp);
 925         return (error);
 926 }
 927 
 928 /*
 929  * Flush any pending I/O.
 930  */
 931 /*ARGSUSED*/
 932 static int
 933 pcfs_sync(
 934         struct vfs *vfsp,
 935         short flag,
 936         struct cred *cr)
 937 {
 938         struct pcfs *fsp;
 939         int error = 0;
 940 
 941         /* this prevents the filesystem from being umounted. */
 942         mutex_enter(&pcfslock);
 943         if (vfsp != NULL) {
 944                 fsp = VFSTOPCFS(vfsp);
 945                 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
 946                         error = pc_syncfsnodes(fsp);
 947                 } else {
 948                         rw_enter(&pcnodes_lock, RW_WRITER);
 949                         pc_diskchanged(fsp);
 950                         rw_exit(&pcnodes_lock);
 951                         error = EIO;
 952                 }
 953         } else {
 954                 fsp = pc_mounttab;
 955                 while (fsp != NULL) {
 956                         if (fsp->pcfs_flags & PCFS_IRRECOV) {
 957                                 rw_enter(&pcnodes_lock, RW_WRITER);
 958                                 pc_diskchanged(fsp);
 959                                 rw_exit(&pcnodes_lock);
 960                                 error = EIO;
 961                                 break;
 962                         }
 963                         error = pc_syncfsnodes(fsp);
 964                         if (error) break;
 965                         fsp = fsp->pcfs_nxt;
 966                 }
 967         }
 968         mutex_exit(&pcfslock);
 969         return (error);
 970 }
 971 
 972 int
 973 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
 974 {
 975         int err;
 976 
 977         if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
 978                 return (EIO);
 979 
 980         if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
 981                 fsp->pcfs_count++;
 982         } else {
 983                 mutex_enter(&fsp->pcfs_lock);
 984                 if (fsp->pcfs_flags & PCFS_LOCKED)
 985                         panic("pc_lockfs");
 986                 /*
 987                  * We check the IRRECOV bit again just in case somebody
 988                  * snuck past the initial check but then got held up before
 989                  * they could grab the lock.  (And in the meantime someone
 990                  * had grabbed the lock and set the bit)
 991                  */
 992                 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
 993                         if ((err = pc_getfat(fsp))) {
 994                                 mutex_exit(&fsp->pcfs_lock);
 995                                 return (err);
 996                         }
 997                 }
 998                 fsp->pcfs_flags |= PCFS_LOCKED;
 999                 fsp->pcfs_owner = curthread;
1000                 fsp->pcfs_count++;
1001         }
1002         return (0);
1003 }
1004 
1005 void
1006 pc_unlockfs(struct pcfs *fsp)
1007 {
1008 
1009         if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1010                 panic("pc_unlockfs");
1011         if (--fsp->pcfs_count < 0)
1012                 panic("pc_unlockfs: count");
1013         if (fsp->pcfs_count == 0) {
1014                 fsp->pcfs_flags &= ~PCFS_LOCKED;
1015                 fsp->pcfs_owner = 0;
1016                 mutex_exit(&fsp->pcfs_lock);
1017         }
1018 }
1019 
1020 int
1021 pc_syncfat(struct pcfs *fsp)
1022 {
1023         struct buf *bp;
1024         int nfat;
1025         int     error = 0;
1026         struct fat_od_fsi *fsinfo_disk;
1027 
1028         if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1029             !(fsp->pcfs_flags & PCFS_FATMOD))
1030                 return (0);
1031         /*
1032          * write out all copies of FATs
1033          */
1034         fsp->pcfs_flags &= ~PCFS_FATMOD;
1035         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1036         for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1037                 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1038                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1039                 if (error) {
1040                         pc_mark_irrecov(fsp);
1041                         return (EIO);
1042                 }
1043         }
1044         pc_clear_fatchanges(fsp);
1045 
1046         /*
1047          * Write out fsinfo sector.
1048          */
1049         if (IS_FAT32(fsp)) {
1050                 bp = bread(fsp->pcfs_xdev,
1051                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1052                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1053                         error = geterror(bp);
1054                 }
1055                 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1056                 if (!error && FSISIG_OK(fsinfo_disk)) {
1057                         fsinfo_disk->fsi_incore.fs_free_clusters =
1058                             LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1059                         fsinfo_disk->fsi_incore.fs_next_free =
1060                             LE_32(FSINFO_UNKNOWN);
1061                         bwrite2(bp);
1062                         error = geterror(bp);
1063                 }
1064                 brelse(bp);
1065                 if (error) {
1066                         pc_mark_irrecov(fsp);
1067                         return (EIO);
1068                 }
1069         }
1070         return (0);
1071 }
1072 
1073 void
1074 pc_invalfat(struct pcfs *fsp)
1075 {
1076         struct pcfs *xfsp;
1077         int mount_cnt = 0;
1078 
1079         if (fsp->pcfs_fatp == (uchar_t *)0)
1080                 panic("pc_invalfat");
1081         /*
1082          * Release FAT
1083          */
1084         kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1085         fsp->pcfs_fatp = NULL;
1086         kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1087         fsp->pcfs_fat_changemap = NULL;
1088         /*
1089          * Invalidate all the blocks associated with the device.
1090          * Not needed if stateless.
1091          */
1092         for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1093                 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1094                         mount_cnt++;
1095 
1096         if (!mount_cnt)
1097                 binval(fsp->pcfs_xdev);
1098         /*
1099          * close mounted device
1100          */
1101         (void) VOP_CLOSE(fsp->pcfs_devvp,
1102             (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1103             1, (offset_t)0, CRED(), NULL);
1104 }
1105 
1106 void
1107 pc_badfs(struct pcfs *fsp)
1108 {
1109         cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1110             getmajor(fsp->pcfs_devvp->v_rdev),
1111             getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1112 }
1113 
1114 /*
1115  * The problem with supporting NFS on the PCFS filesystem is that there
1116  * is no good place to keep the generation number. The only possible
1117  * place is inside a directory entry. There are a few words that we
1118  * don't use - they store NT & OS/2 attributes, and the creation/last access
1119  * time of the file - but it seems wrong to use them. In addition, directory
1120  * entries come and go. If a directory is removed completely, its directory
1121  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1122  * inode blocks are dedicated for inodes, so the generation numbers are
1123  * permanently kept on the disk.
1124  */
1125 static int
1126 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1127 {
1128         struct pcnode *pcp;
1129         struct pc_fid *pcfid;
1130         struct pcfs *fsp;
1131         struct pcdir *ep;
1132         daddr_t eblkno;
1133         int eoffset;
1134         struct buf *bp;
1135         int error;
1136         pc_cluster32_t  cn;
1137 
1138         pcfid = (struct pc_fid *)fidp;
1139         fsp = VFSTOPCFS(vfsp);
1140 
1141         error = pc_lockfs(fsp, 0, 0);
1142         if (error) {
1143                 *vpp = NULL;
1144                 return (error);
1145         }
1146 
1147         if (pcfid->pcfid_block == 0) {
1148                 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1149                 pcp->pc_flags |= PC_EXTERNAL;
1150                 *vpp = PCTOV(pcp);
1151                 pc_unlockfs(fsp);
1152                 return (0);
1153         }
1154         eblkno = pcfid->pcfid_block;
1155         eoffset = pcfid->pcfid_offset;
1156 
1157         if ((pc_dbtocl(fsp,
1158             eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1159             (eoffset > fsp->pcfs_clsize)) {
1160                 pc_unlockfs(fsp);
1161                 *vpp = NULL;
1162                 return (EINVAL);
1163         }
1164 
1165         if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1166             < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1167                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1168                     fsp->pcfs_clsize);
1169         } else {
1170                 /*
1171                  * This is an access "backwards" into the FAT12/FAT16
1172                  * root directory. A better code structure would
1173                  * significantly improve maintainability here ...
1174                  */
1175                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1176                     (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1177         }
1178         if (bp->b_flags & (B_ERROR | B_STALE)) {
1179                 error = geterror(bp);
1180                 brelse(bp);
1181                 if (error)
1182                         pc_mark_irrecov(fsp);
1183                 *vpp = NULL;
1184                 pc_unlockfs(fsp);
1185                 return (error);
1186         }
1187         ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1188         /*
1189          * Ok, if this is a valid file handle that we gave out,
1190          * then simply ensuring that the creation time matches,
1191          * the entry has not been deleted, and it has a valid first
1192          * character should be enough.
1193          *
1194          * Unfortunately, verifying that the <blkno, offset> _still_
1195          * refers to a directory entry is not easy, since we'd have
1196          * to search _all_ directories starting from root to find it.
1197          * That's a high price to pay just in case somebody is forging
1198          * file handles. So instead we verify that as much of the
1199          * entry is valid as we can:
1200          *
1201          * 1. The starting cluster is 0 (unallocated) or valid
1202          * 2. It is not an LFN entry
1203          * 3. It is not hidden (unless mounted as such)
1204          * 4. It is not the label
1205          */
1206         cn = pc_getstartcluster(fsp, ep);
1207         /*
1208          * if the starting cluster is valid, but not valid according
1209          * to pc_validcl(), force it to be to simplify the following if.
1210          */
1211         if (cn == 0)
1212                 cn = PCF_FIRSTCLUSTER;
1213         if (IS_FAT32(fsp)) {
1214                 if (cn >= PCF_LASTCLUSTER32)
1215                         cn = PCF_FIRSTCLUSTER;
1216         } else {
1217                 if (cn >= PCF_LASTCLUSTER)
1218                         cn = PCF_FIRSTCLUSTER;
1219         }
1220         if ((!pc_validcl(fsp, cn)) ||
1221             (PCDL_IS_LFN(ep)) ||
1222             (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1223             ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1224                 bp->b_flags |= B_STALE | B_AGE;
1225                 brelse(bp);
1226                 pc_unlockfs(fsp);
1227                 return (EINVAL);
1228         }
1229         if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1230             (ep->pcd_filename[0] != PCD_ERASED) &&
1231             (pc_validchar(ep->pcd_filename[0]) ||
1232             (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1233                 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1234                 pcp->pc_flags |= PC_EXTERNAL;
1235                 *vpp = PCTOV(pcp);
1236         } else {
1237                 *vpp = NULL;
1238         }
1239         bp->b_flags |= B_STALE | B_AGE;
1240         brelse(bp);
1241         pc_unlockfs(fsp);
1242         return (0);
1243 }
1244 
1245 /*
1246  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1247  * a meg), so we can't bread() it all in at once. This routine reads a
1248  * fat a chunk at a time.
1249  */
1250 static int
1251 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1252 {
1253         struct buf *bp;
1254         size_t off;
1255         size_t readsize;
1256         daddr_t diskblk;
1257         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1258         daddr_t start = fsp->pcfs_fatstart;
1259 
1260         readsize = fsp->pcfs_clsize;
1261         for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1262                 if (readsize > (fatsize - off))
1263                         readsize = fatsize - off;
1264                 diskblk = pc_dbdaddr(fsp, start +
1265                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1266                 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1267                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1268                         brelse(bp);
1269                         return (EIO);
1270                 }
1271                 bp->b_flags |= B_STALE | B_AGE;
1272                 bcopy(bp->b_un.b_addr, fatp, readsize);
1273                 brelse(bp);
1274         }
1275         return (0);
1276 }
1277 
1278 /*
1279  * We write the FAT out a _lot_, in order to make sure that it
1280  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1281  * the FAT might be a couple of megabytes, and writing it all out just
1282  * because we created or deleted a small file is painful (especially
1283  * since we do it for each alternate FAT too). So instead, for FAT16 and
1284  * FAT32 we only write out the bit that has changed. We don't clear
1285  * the 'updated' fields here because the caller might be writing out
1286  * several FATs, so the caller must use pc_clear_fatchanges() after
1287  * all FATs have been updated.
1288  * This function doesn't take "start" from fsp->pcfs_dosstart because
1289  * callers can use it to write either the primary or any of the alternate
1290  * FAT tables.
1291  */
1292 static int
1293 pc_writefat(struct pcfs *fsp, daddr_t start)
1294 {
1295         struct buf *bp;
1296         size_t off;
1297         size_t writesize;
1298         int     error;
1299         uchar_t *fatp = fsp->pcfs_fatp;
1300         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1301 
1302         writesize = fsp->pcfs_clsize;
1303         for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1304                 if (writesize > (fatsize - off))
1305                         writesize = fatsize - off;
1306                 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1307                         continue;
1308                 }
1309                 bp = ngeteblk(writesize);
1310                 bp->b_edev = fsp->pcfs_xdev;
1311                 bp->b_dev = cmpdev(bp->b_edev);
1312                 bp->b_blkno = pc_dbdaddr(fsp, start +
1313                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1314                 bcopy(fatp, bp->b_un.b_addr, writesize);
1315                 bwrite2(bp);
1316                 error = geterror(bp);
1317                 brelse(bp);
1318                 if (error) {
1319                         return (error);
1320                 }
1321         }
1322         return (0);
1323 }
1324 
1325 /*
1326  * Mark the FAT cluster that 'cn' is stored in as modified.
1327  */
1328 void
1329 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1330 {
1331         pc_cluster32_t  bn;
1332         size_t          size;
1333 
1334         /* which fat block is the cluster number stored in? */
1335         if (IS_FAT32(fsp)) {
1336                 size = sizeof (pc_cluster32_t);
1337                 bn = pc_lblkno(fsp, cn * size);
1338                 fsp->pcfs_fat_changemap[bn] = 1;
1339         } else if (IS_FAT16(fsp)) {
1340                 size = sizeof (pc_cluster16_t);
1341                 bn = pc_lblkno(fsp, cn * size);
1342                 fsp->pcfs_fat_changemap[bn] = 1;
1343         } else {
1344                 offset_t off;
1345                 pc_cluster32_t nbn;
1346 
1347                 ASSERT(IS_FAT12(fsp));
1348                 off = cn + (cn >> 1);
1349                 bn = pc_lblkno(fsp, off);
1350                 fsp->pcfs_fat_changemap[bn] = 1;
1351                 /* does this field wrap into the next fat cluster? */
1352                 nbn = pc_lblkno(fsp, off + 1);
1353                 if (nbn != bn) {
1354                         fsp->pcfs_fat_changemap[nbn] = 1;
1355                 }
1356         }
1357 }
1358 
1359 /*
1360  * return whether the FAT cluster 'bn' is updated and needs to
1361  * be written out.
1362  */
1363 int
1364 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1365 {
1366         return (fsp->pcfs_fat_changemap[bn] == 1);
1367 }
1368 
1369 /*
1370  * Implementation of VFS_FREEVFS() to support forced umounts.
1371  * This is called by the vfs framework after umount, to trigger
1372  * the release of any resources still associated with the given
1373  * vfs_t once the need to keep them has gone away.
1374  */
1375 void
1376 pcfs_freevfs(vfs_t *vfsp)
1377 {
1378         struct pcfs *fsp = VFSTOPCFS(vfsp);
1379 
1380         mutex_enter(&pcfslock);
1381         /*
1382          * Purging the FAT closes the device - can't do any more
1383          * I/O after this.
1384          */
1385         if (fsp->pcfs_fatp != (uchar_t *)0)
1386                 pc_invalfat(fsp);
1387         mutex_exit(&pcfslock);
1388 
1389         VN_RELE(fsp->pcfs_devvp);
1390         mutex_destroy(&fsp->pcfs_lock);
1391         kmem_free(fsp, sizeof (*fsp));
1392 
1393         /*
1394          * Allow _fini() to succeed now, if so desired.
1395          */
1396         atomic_dec_32(&pcfs_mountcount);
1397 }
1398 
1399 
1400 /*
1401  * PC-style partition parsing and FAT BPB identification/validation code.
1402  * The partition parsers here assume:
1403  *      - a FAT filesystem will be in a partition that has one of a set of
1404  *        recognized partition IDs
1405  *      - the user wants the 'numbering' (C:, D:, ...) that one would get
1406  *        on MSDOS 6.x.
1407  *        That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1408  *        will not factor in the enumeration.
1409  * These days, such assumptions should be revisited. FAT is no longer the
1410  * only game in 'PC town'.
1411  */
1412 /*
1413  * isDosDrive()
1414  *      Boolean function.  Give it the systid field for an fdisk partition
1415  *      and it decides if that's a systid that describes a DOS drive.  We
1416  *      use systid values defined in sys/dktp/fdisk.h.
1417  */
1418 static int
1419 isDosDrive(uchar_t checkMe)
1420 {
1421         return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1422             (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1423             (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1424             (checkMe == DIAGPART));
1425 }
1426 
1427 
1428 /*
1429  * isDosExtended()
1430  *      Boolean function.  Give it the systid field for an fdisk partition
1431  *      and it decides if that's a systid that describes an extended DOS
1432  *      partition.
1433  */
1434 static int
1435 isDosExtended(uchar_t checkMe)
1436 {
1437         return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1438 }
1439 
1440 
1441 /*
1442  * isBootPart()
1443  *      Boolean function.  Give it the systid field for an fdisk partition
1444  *      and it decides if that's a systid that describes a Solaris boot
1445  *      partition.
1446  */
1447 static int
1448 isBootPart(uchar_t checkMe)
1449 {
1450         return (checkMe == X86BOOT);
1451 }
1452 
1453 
1454 /*
1455  * noLogicalDrive()
1456  *      Display error message about not being able to find a logical
1457  *      drive.
1458  */
1459 static void
1460 noLogicalDrive(int ldrive)
1461 {
1462         if (ldrive == BOOT_PARTITION_DRIVE) {
1463                 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1464         } else {
1465                 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1466         }
1467 }
1468 
1469 
1470 /*
1471  * findTheDrive()
1472  *      Discover offset of the requested logical drive, and return
1473  *      that offset (startSector), the systid of that drive (sysid),
1474  *      and a buffer pointer (bp), with the buffer contents being
1475  *      the first sector of the logical drive (i.e., the sector that
1476  *      contains the BPB for that drive).
1477  *
1478  * Note: this code is not capable of addressing >2TB disks, as it uses
1479  *       daddr_t not diskaddr_t, some of the calculations would overflow
1480  */
1481 #define COPY_PTBL(mbr, ptblp)                                   \
1482         bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),        \
1483             FD_NUMPART * sizeof (struct ipart))
1484 
1485 static int
1486 findTheDrive(struct pcfs *fsp, buf_t **bp)
1487 {
1488         int ldrive = fsp->pcfs_ldrive;
1489         dev_t dev = fsp->pcfs_devvp->v_rdev;
1490 
1491         struct ipart dosp[FD_NUMPART];  /* incore fdisk partition structure */
1492         daddr_t lastseek = 0;           /* Disk block we sought previously */
1493         daddr_t diskblk = 0;            /* Disk block to get */
1494         daddr_t xstartsect;             /* base of Extended DOS partition */
1495         int logicalDriveCount = 0;      /* Count of logical drives seen */
1496         int extendedPart = -1;          /* index of extended dos partition */
1497         int primaryPart = -1;           /* index of primary dos partition */
1498         int bootPart = -1;              /* index of a Solaris boot partition */
1499         int xnumsect = -1;              /* length of extended DOS partition */
1500         int driveIndex;                 /* computed FDISK table index */
1501         daddr_t startsec;
1502         len_t mediasize;
1503         int i;
1504         /*
1505          * Count of drives in the current extended partition's
1506          * FDISK table, and indexes of the drives themselves.
1507          */
1508         int extndDrives[FD_NUMPART];
1509         int numDrives = 0;
1510 
1511         /*
1512          * Count of drives (beyond primary) in master boot record's
1513          * FDISK table, and indexes of the drives themselves.
1514          */
1515         int extraDrives[FD_NUMPART];
1516         int numExtraDrives = 0;
1517 
1518         /*
1519          * "ldrive == 0" should never happen, as this is a request to
1520          * mount the physical device (and ignore partitioning). The code
1521          * in pcfs_mount() should have made sure that a logical drive number
1522          * is at least 1, meaning we're looking for drive "C:". It is not
1523          * safe (and a bug in the callers of this function) to request logical
1524          * drive number 0; we could ASSERT() but a graceful EIO is a more
1525          * polite way.
1526          */
1527         if (ldrive == 0) {
1528                 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1529                 noLogicalDrive(ldrive);
1530                 return (EIO);
1531         }
1532 
1533         /*
1534          *  Copy from disk block into memory aligned structure for fdisk usage.
1535          */
1536         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1537 
1538         /*
1539          * This check is ok because a FAT BPB and a master boot record (MBB)
1540          * have the same signature, in the same position within the block.
1541          */
1542         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1543                 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1544                     "device (%x.%x):%d\n",
1545                     getmajor(dev), getminor(dev), ldrive);
1546                 return (EINVAL);
1547         }
1548 
1549         /*
1550          * Get a summary of what is in the Master FDISK table.
1551          * Normally we expect to find one partition marked as a DOS drive.
1552          * This partition is the one Windows calls the primary dos partition.
1553          * If the machine has any logical drives then we also expect
1554          * to find a partition marked as an extended DOS partition.
1555          *
1556          * Sometimes we'll find multiple partitions marked as DOS drives.
1557          * The Solaris fdisk program allows these partitions
1558          * to be created, but Windows fdisk no longer does.  We still need
1559          * to support these, though, since Windows does.  We also need to fix
1560          * our fdisk to behave like the Windows version.
1561          *
1562          * It turns out that some off-the-shelf media have *only* an
1563          * Extended partition, so we need to deal with that case as well.
1564          *
1565          * Only a single (the first) Extended or Boot Partition will
1566          * be recognized.  Any others will be ignored.
1567          */
1568         for (i = 0; i < FD_NUMPART; i++) {
1569                 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1570                     uint_t, (uint_t)dosp[i].systid,
1571                     uint_t, LE_32(dosp[i].relsect),
1572                     uint_t, LE_32(dosp[i].numsect));
1573 
1574                 if (isDosDrive(dosp[i].systid)) {
1575                         if (primaryPart < 0) {
1576                                 logicalDriveCount++;
1577                                 primaryPart = i;
1578                         } else {
1579                                 extraDrives[numExtraDrives++] = i;
1580                         }
1581                         continue;
1582                 }
1583                 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1584                         extendedPart = i;
1585                         continue;
1586                 }
1587                 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1588                         bootPart = i;
1589                         continue;
1590                 }
1591         }
1592 
1593         if (ldrive == BOOT_PARTITION_DRIVE) {
1594                 if (bootPart < 0) {
1595                         noLogicalDrive(ldrive);
1596                         return (EINVAL);
1597                 }
1598                 startsec = LE_32(dosp[bootPart].relsect);
1599                 mediasize = LE_32(dosp[bootPart].numsect);
1600                 goto found;
1601         }
1602 
1603         if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1604                 startsec = LE_32(dosp[primaryPart].relsect);
1605                 mediasize = LE_32(dosp[primaryPart].numsect);
1606                 goto found;
1607         }
1608 
1609         /*
1610          * We are not looking for the C: drive (or the primary drive
1611          * was not found), so we had better have an extended partition
1612          * or extra drives in the Master FDISK table.
1613          */
1614         if ((extendedPart < 0) && (numExtraDrives == 0)) {
1615                 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1616                 noLogicalDrive(ldrive);
1617                 return (EINVAL);
1618         }
1619 
1620         if (extendedPart >= 0) {
1621                 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1622                 xnumsect = LE_32(dosp[extendedPart].numsect);
1623                 do {
1624                         /*
1625                          *  If the seek would not cause us to change
1626                          *  position on the drive, then we're out of
1627                          *  extended partitions to examine.
1628                          */
1629                         if (diskblk == lastseek)
1630                                 break;
1631                         logicalDriveCount += numDrives;
1632                         /*
1633                          *  Seek the next extended partition, and find
1634                          *  logical drives within it.
1635                          */
1636                         brelse(*bp);
1637                         /*
1638                          * bread() block numbers are multiples of DEV_BSIZE
1639                          * but the device sector size (the unit of partitioning)
1640                          * might be larger than that; pcfs_get_device_info()
1641                          * has calculated the multiplicator for us.
1642                          */
1643                         *bp = bread(dev,
1644                             pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1645                         if ((*bp)->b_flags & B_ERROR) {
1646                                 return (EIO);
1647                         }
1648 
1649                         lastseek = diskblk;
1650                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1651                         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1652                                 cmn_err(CE_NOTE, "!pcfs: "
1653                                     "extended partition table signature err, "
1654                                     "device (%x.%x):%d, LBA %u",
1655                                     getmajor(dev), getminor(dev), ldrive,
1656                                     (uint_t)pc_dbdaddr(fsp, diskblk));
1657                                 return (EINVAL);
1658                         }
1659                         /*
1660                          *  Count up drives, and track where the next
1661                          *  extended partition is in case we need it.  We
1662                          *  are expecting only one extended partition.  If
1663                          *  there is more than one we'll only go to the
1664                          *  first one we see, but warn about ignoring.
1665                          */
1666                         numDrives = 0;
1667                         for (i = 0; i < FD_NUMPART; i++) {
1668                                 DTRACE_PROBE4(extendedpart,
1669                                     struct pcfs *, fsp,
1670                                     uint_t, (uint_t)dosp[i].systid,
1671                                     uint_t, LE_32(dosp[i].relsect),
1672                                     uint_t, LE_32(dosp[i].numsect));
1673                                 if (isDosDrive(dosp[i].systid)) {
1674                                         extndDrives[numDrives++] = i;
1675                                 } else if (isDosExtended(dosp[i].systid)) {
1676                                         if (diskblk != lastseek) {
1677                                                 /*
1678                                                  * Already found an extended
1679                                                  * partition in this table.
1680                                                  */
1681                                                 cmn_err(CE_NOTE,
1682                                                     "!pcfs: ignoring unexpected"
1683                                                     " additional extended"
1684                                                     " partition");
1685                                         } else {
1686                                                 diskblk = xstartsect +
1687                                                     LE_32(dosp[i].relsect);
1688                                         }
1689                                 }
1690                         }
1691                 } while (ldrive > logicalDriveCount + numDrives);
1692 
1693                 ASSERT(numDrives <= FD_NUMPART);
1694 
1695                 if (ldrive <= logicalDriveCount + numDrives) {
1696                         /*
1697                          * The number of logical drives we've found thus
1698                          * far is enough to get us to the one we were
1699                          * searching for.
1700                          */
1701                         driveIndex = logicalDriveCount + numDrives - ldrive;
1702                         mediasize =
1703                             LE_32(dosp[extndDrives[driveIndex]].numsect);
1704                         startsec =
1705                             LE_32(dosp[extndDrives[driveIndex]].relsect) +
1706                             lastseek;
1707                         if (startsec > (xstartsect + xnumsect)) {
1708                                 cmn_err(CE_NOTE, "!pcfs: extended partition "
1709                                     "values bad");
1710                                 return (EINVAL);
1711                         }
1712                         goto found;
1713                 } else {
1714                         /*
1715                          * We ran out of extended dos partition
1716                          * drives.  The only hope now is to go
1717                          * back to extra drives defined in the master
1718                          * fdisk table.  But we overwrote that table
1719                          * already, so we must load it in again.
1720                          */
1721                         logicalDriveCount += numDrives;
1722                         brelse(*bp);
1723                         ASSERT(fsp->pcfs_dosstart == 0);
1724                         *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1725                             fsp->pcfs_secsize);
1726                         if ((*bp)->b_flags & B_ERROR) {
1727                                 return (EIO);
1728                         }
1729                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1730                 }
1731         }
1732         /*
1733          *  Still haven't found the drive, is it an extra
1734          *  drive defined in the main FDISK table?
1735          */
1736         if (ldrive <= logicalDriveCount + numExtraDrives) {
1737                 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1738                 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1739                 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1740                 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1741                 goto found;
1742         }
1743         /*
1744          *  Still haven't found the drive, and there is
1745          *  nowhere else to look.
1746          */
1747         noLogicalDrive(ldrive);
1748         return (EINVAL);
1749 
1750 found:
1751         /*
1752          * We need this value in units of sectorsize, because PCFS' internal
1753          * offset calculations go haywire for > 512Byte sectors unless all
1754          * pcfs_.*start values are in units of sectors.
1755          * So, assign before the capacity check (that's done in DEV_BSIZE)
1756          */
1757         fsp->pcfs_dosstart = startsec;
1758 
1759         /*
1760          * convert from device sectors to proper units:
1761          *      - starting sector: DEV_BSIZE (as argument to bread())
1762          *      - media size: Bytes
1763          */
1764         startsec = pc_dbdaddr(fsp, startsec);
1765         mediasize *= fsp->pcfs_secsize;
1766 
1767         /*
1768          * some additional validation / warnings in case the partition table
1769          * and the actual media capacity are not in accordance ...
1770          */
1771         if (fsp->pcfs_mediasize != 0) {
1772                 diskaddr_t startoff =
1773                     (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1774 
1775                 if (startoff >= fsp->pcfs_mediasize ||
1776                     startoff + mediasize > fsp->pcfs_mediasize) {
1777                         cmn_err(CE_WARN,
1778                             "!pcfs: partition size (LBA start %u, %lld bytes, "
1779                             "device (%x.%x):%d) smaller than "
1780                             "mediasize (%lld bytes).\n"
1781                             "filesystem may be truncated, access errors "
1782                             "may result.\n",
1783                             (uint_t)startsec, (long long)mediasize,
1784                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1785                             fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1786                 }
1787         } else {
1788                 fsp->pcfs_mediasize = mediasize;
1789         }
1790 
1791         return (0);
1792 }
1793 
1794 
1795 static fattype_t
1796 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1797 {
1798         uint32_t ncl = fsp->pcfs_ncluster;
1799 
1800         if (ncl <= 4096) {
1801                 if (bpb_get_FatSz16(bpb) == 0)
1802                         return (FAT_UNKNOWN);
1803 
1804                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1805                     bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1806                         return (FAT12);
1807                 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1808                         return (FAT12);
1809                 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1810                         return (FAT16);
1811 
1812                 switch (bpb_get_Media(bpb)) {
1813                         case SS8SPT:
1814                         case DS8SPT:
1815                         case SS9SPT:
1816                         case DS9SPT:
1817                         case DS18SPT:
1818                         case DS9_15SPT:
1819                                 /*
1820                                  * Is this reliable - all floppies are FAT12 ?
1821                                  */
1822                                 return (FAT12);
1823                         case MD_FIXED:
1824                                 /*
1825                                  * Is this reliable - disks are always FAT16 ?
1826                                  */
1827                                 return (FAT16);
1828                         default:
1829                                 break;
1830                 }
1831         } else if (ncl <= 65536) {
1832                 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1833                         return (FAT32);
1834                 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1835                         return (FAT32);
1836                 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1837                         return (FAT32);
1838 
1839                 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1840                         return (FAT16);
1841                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1842                         return (FAT16);
1843         }
1844 
1845         /*
1846          * We don't know
1847          */
1848         return (FAT_UNKNOWN);
1849 }
1850 
1851 /*
1852  * Check to see if the BPB we found is correct.
1853  *
1854  * This looks far more complicated that it needs to be for pure structural
1855  * validation. The reason for this is that parseBPB() is also used for
1856  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1857  * BPB fields have 'known good' values, even if we do not reject the BPB
1858  * when attempting to mount the filesystem.
1859  */
1860 static int
1861 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1862 {
1863         fattype_t type;
1864 
1865         uint32_t        ncl;    /* number of clusters in file area */
1866         uint32_t        rec;
1867         uint32_t        reserved;
1868         uint32_t        fsisec, bkbootsec;
1869         blkcnt_t        totsec, totsec16, totsec32, datasec;
1870         size_t          fatsec, fatsec16, fatsec32, rdirsec;
1871         size_t          secsize;
1872         len_t           mediasize;
1873         uint64_t        validflags = 0;
1874 
1875         if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1876                 validflags |= BPB_BPBSIG_OK;
1877 
1878         rec = bpb_get_RootEntCnt(bpb);
1879         reserved = bpb_get_RsvdSecCnt(bpb);
1880         fsisec = bpb_get_FSInfo32(bpb);
1881         bkbootsec = bpb_get_BkBootSec32(bpb);
1882         totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1883         totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1884         fatsec16 = bpb_get_FatSz16(bpb);
1885         fatsec32 = bpb_get_FatSz32(bpb);
1886 
1887         totsec = totsec16 ? totsec16 : totsec32;
1888         fatsec = fatsec16 ? fatsec16 : fatsec32;
1889 
1890         secsize = bpb_get_BytesPerSec(bpb);
1891         if (!VALID_SECSIZE(secsize))
1892                 secsize = fsp->pcfs_secsize;
1893         if (secsize != fsp->pcfs_secsize) {
1894                 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1895                     getmajor(fsp->pcfs_xdev),
1896                     getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1897                 PC_DPRINTF2(3, "!BPB secsize %d != "
1898                     "autodetected media block size %d\n",
1899                     (int)secsize, (int)fsp->pcfs_secsize);
1900                 if (fsp->pcfs_ldrive) {
1901                         /*
1902                          * We've already attempted to parse the partition
1903                          * table. If the block size used for that don't match
1904                          * the PCFS sector size, we're hosed one way or the
1905                          * other. Just try what happens.
1906                          */
1907                         secsize = fsp->pcfs_secsize;
1908                         PC_DPRINTF1(3,
1909                             "!pcfs: Using autodetected secsize %d\n",
1910                             (int)secsize);
1911                 } else {
1912                         /*
1913                          * This allows mounting lofi images of PCFS partitions
1914                          * with sectorsize != DEV_BSIZE. We can't parse the
1915                          * partition table on whole-disk images unless the
1916                          * (undocumented) "secsize=..." mount option is used,
1917                          * but at least this allows us to mount if we have
1918                          * an image of a partition.
1919                          */
1920                         PC_DPRINTF1(3,
1921                             "!pcfs: Using BPB secsize %d\n", (int)secsize);
1922                 }
1923         }
1924 
1925         if (fsp->pcfs_mediasize == 0) {
1926                 mediasize = (len_t)totsec * (len_t)secsize;
1927                 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1928                     "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1929                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1930                     fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1931         } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1932                 cmn_err(CE_WARN,
1933                     "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1934                     "FAT BPB mediasize (%lld Bytes).\n"
1935                     "truncated filesystem on device (%x.%x):%d, access errors "
1936                     "possible.\n",
1937                     (long long)fsp->pcfs_mediasize,
1938                     (long long)(totsec * (blkcnt_t)secsize),
1939                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1940                     fsp->pcfs_ldrive);
1941                 mediasize = fsp->pcfs_mediasize;
1942         } else {
1943                 /*
1944                  * This is actually ok. A FAT needs not occupy the maximum
1945                  * space available in its partition, it can be shorter.
1946                  */
1947                 mediasize = (len_t)totsec * (len_t)secsize;
1948         }
1949 
1950         /*
1951          * Since we let just about anything pass through this function,
1952          * fence against divide-by-zero here.
1953          */
1954         if (secsize)
1955                 rdirsec = roundup(rec * 32, secsize) / secsize;
1956         else
1957                 rdirsec = 0;
1958 
1959         /*
1960          * This assignment is necessary before pc_dbdaddr() can first be
1961          * used. Must initialize the value here.
1962          */
1963         fsp->pcfs_secsize = secsize;
1964         fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
1965 
1966         fsp->pcfs_mediasize = mediasize;
1967 
1968         fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
1969         fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
1970         fsp->pcfs_mediadesc = bpb_get_Media(bpb);
1971         fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
1972         fsp->pcfs_rdirsec = rdirsec;
1973 
1974         /*
1975          * Remember: All PCFS offset calculations in sectors. Before I/O
1976          * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
1977          * necessary so that media with > 512Byte sector sizes work correctly.
1978          */
1979         fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
1980         fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
1981         fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
1982         datasec = totsec -
1983             (blkcnt_t)fatsec * fsp->pcfs_numfat -
1984             (blkcnt_t)rdirsec -
1985             (blkcnt_t)reserved;
1986 
1987         DTRACE_PROBE4(fatgeometry,
1988             blkcnt_t, totsec, size_t, fatsec,
1989             size_t, rdirsec, blkcnt_t, datasec);
1990 
1991         /*
1992          * UINT32_MAX is an underflow check - we calculate in "blkcnt_t" which
1993          * is 64bit in order to be able to catch "impossible" sector counts.
1994          * A sector count in FAT must fit 32bit unsigned int.
1995          */
1996         if (totsec != 0 &&
1997             (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
1998             (len_t)totsec * (len_t)secsize <= mediasize &&
1999             datasec < totsec && datasec <= UINT32_MAX)
2000                 validflags |= BPB_TOTSEC_OK;
2001 
2002         if (mediasize >= (len_t)datasec * (len_t)secsize)
2003                 validflags |= BPB_MEDIASZ_OK;
2004 
2005         if (VALID_SECSIZE(secsize))
2006                 validflags |= BPB_SECSIZE_OK;
2007         if (VALID_SPCL(fsp->pcfs_spcl))
2008                 validflags |= BPB_SECPERCLUS_OK;
2009         if (VALID_CLSIZE(fsp->pcfs_clsize))
2010                 validflags |= BPB_CLSIZE_OK;
2011         if (VALID_NUMFATS(fsp->pcfs_numfat))
2012                 validflags |= BPB_NUMFAT_OK;
2013         if (VALID_RSVDSEC(reserved) && reserved < totsec)
2014                 validflags |= BPB_RSVDSECCNT_OK;
2015         if (VALID_MEDIA(fsp->pcfs_mediadesc))
2016                 validflags |= BPB_MEDIADESC_OK;
2017         if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2018                 validflags |= BPB_BOOTSIG16_OK;
2019         if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2020                 validflags |= BPB_BOOTSIG32_OK;
2021         if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2022                 validflags |= BPB_FSTYPSTR16_OK;
2023         if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2024                 validflags |= BPB_FSTYPSTR32_OK;
2025         if (VALID_OEMNAME(bpb_OEMName(bpb)))
2026                 validflags |= BPB_OEMNAME_OK;
2027         if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2028                 validflags |= BPB_BKBOOTSEC_OK;
2029         if (fsisec > 0 && fsisec <= reserved)
2030                 validflags |= BPB_FSISEC_OK;
2031         if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2032                 validflags |= BPB_JMPBOOT_OK;
2033         if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2034                 validflags |= BPB_FSVER_OK;
2035         if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2036                 validflags |= BPB_VOLLAB16_OK;
2037         if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2038                 validflags |= BPB_VOLLAB32_OK;
2039         if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2040                 validflags |= BPB_EXTFLAGS_OK;
2041 
2042         /*
2043          * Try to determine which FAT format to use.
2044          *
2045          * Calculate the number of clusters in order to determine
2046          * the type of FAT we are looking at.  This is the only
2047          * recommended way of determining FAT type, though there
2048          * are other hints in the data, this is the best way.
2049          *
2050          * Since we let just about "anything" pass through this function
2051          * without early exits, fence against divide-by-zero here.
2052          *
2053          * datasec was already validated against UINT32_MAX so we know
2054          * the result will not overflow the 32bit calculation.
2055          */
2056         if (fsp->pcfs_spcl)
2057                 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2058         else
2059                 ncl = 0;
2060 
2061         fsp->pcfs_ncluster = ncl;
2062 
2063         /*
2064          * From the Microsoft FAT specification:
2065          * In the following example, when it says <, it does not mean <=.
2066          * Note also that the numbers are correct.  The first number for
2067          * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2068          * and the '<' signs are not wrong.
2069          *
2070          * We "specialdetect" the corner cases, and use at least one "extra"
2071          * criterion to decide whether it's FAT16 or FAT32 if the cluster
2072          * count is dangerously close to the boundaries.
2073          */
2074 
2075         if (ncl <= PCF_FIRSTCLUSTER) {
2076                 type = FAT_UNKNOWN;
2077         } else if (ncl < 4085) {
2078                 type = FAT12;
2079         } else if (ncl <= 4096) {
2080                 type = FAT_QUESTIONABLE;
2081         } else if (ncl < 65525) {
2082                 type = FAT16;
2083         } else if (ncl <= 65536) {
2084                 type = FAT_QUESTIONABLE;
2085         } else if (ncl < PCF_LASTCLUSTER32) {
2086                 type = FAT32;
2087         } else {
2088                 type = FAT_UNKNOWN;
2089         }
2090 
2091         DTRACE_PROBE4(parseBPB__initial,
2092             struct pcfs *, fsp, unsigned char *, bpb,
2093             int, validflags, fattype_t, type);
2094 
2095 recheck:
2096         fsp->pcfs_fatsec = fatsec;
2097 
2098         /* Do some final sanity checks for each specific type of FAT */
2099         switch (type) {
2100                 case FAT12:
2101                         if (rec != 0)
2102                                 validflags |= BPB_ROOTENTCNT_OK;
2103                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2104                             bpb_get_TotSec16(bpb) == 0)
2105                                 validflags |= BPB_TOTSEC16_OK;
2106                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2107                             bpb_get_TotSec32(bpb) == 0)
2108                                 validflags |= BPB_TOTSEC32_OK;
2109                         if (bpb_get_FatSz16(bpb) == fatsec)
2110                                 validflags |= BPB_FATSZ16_OK;
2111                         if (fatsec * secsize >= ncl * 3 / 2)
2112                                 validflags |= BPB_FATSZ_OK;
2113                         if (ncl < 4085)
2114                                 validflags |= BPB_NCLUSTERS_OK;
2115 
2116                         fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2117                         fsp->pcfs_rootblksize =
2118                             fsp->pcfs_rdirsec * secsize;
2119                         fsp->pcfs_fsistart = 0;
2120 
2121                         if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2122                                 type = FAT_UNKNOWN;
2123                         break;
2124                 case FAT16:
2125                         if (rec != 0)
2126                                 validflags |= BPB_ROOTENTCNT_OK;
2127                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2128                             bpb_get_TotSec16(bpb) == 0)
2129                                 validflags |= BPB_TOTSEC16_OK;
2130                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2131                             bpb_get_TotSec32(bpb) == 0)
2132                                 validflags |= BPB_TOTSEC32_OK;
2133                         if (bpb_get_FatSz16(bpb) == fatsec)
2134                                 validflags |= BPB_FATSZ16_OK;
2135                         if (fatsec * secsize >= ncl * 2)
2136                                 validflags |= BPB_FATSZ_OK;
2137                         if (ncl >= 4085 && ncl < 65525)
2138                                 validflags |= BPB_NCLUSTERS_OK;
2139 
2140                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2141                         fsp->pcfs_rootblksize =
2142                             fsp->pcfs_rdirsec * secsize;
2143                         fsp->pcfs_fsistart = 0;
2144 
2145                         if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2146                                 type = FAT_UNKNOWN;
2147                         break;
2148                 case FAT32:
2149                         if (rec == 0)
2150                                 validflags |= BPB_ROOTENTCNT_OK;
2151                         if (bpb_get_TotSec16(bpb) == 0)
2152                                 validflags |= BPB_TOTSEC16_OK;
2153                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2154                                 validflags |= BPB_TOTSEC32_OK;
2155                         if (bpb_get_FatSz16(bpb) == 0)
2156                                 validflags |= BPB_FATSZ16_OK;
2157                         if (bpb_get_FatSz32(bpb) == fatsec)
2158                                 validflags |= BPB_FATSZ32_OK;
2159                         if (fatsec * secsize >= ncl * 4)
2160                                 validflags |= BPB_FATSZ_OK;
2161                         if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2162                                 validflags |= BPB_NCLUSTERS_OK;
2163 
2164                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2165                         fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2166                         fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2167                         if (validflags & BPB_FSISEC_OK)
2168                                 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2169                         fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2170                         if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2171                                 validflags |= BPB_ROOTCLUSTER_OK;
2172 
2173                         /*
2174                          * Current PCFS code only works if 'pcfs_rdirstart'
2175                          * contains the root cluster number on FAT32.
2176                          * That's a mis-use and would better be changed.
2177                          */
2178                         fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2179 
2180                         if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2181                                 type = FAT_UNKNOWN;
2182                         break;
2183                 case FAT_QUESTIONABLE:
2184                         type = secondaryBPBChecks(fsp, bpb, secsize);
2185                         goto recheck;
2186                 default:
2187                         ASSERT(type == FAT_UNKNOWN);
2188                         break;
2189         }
2190 
2191         ASSERT(type != FAT_QUESTIONABLE);
2192 
2193         fsp->pcfs_fattype = type;
2194 
2195         if (valid)
2196                 *valid = validflags;
2197 
2198         DTRACE_PROBE4(parseBPB__final,
2199             struct pcfs *, fsp, unsigned char *, bpb,
2200             int, validflags, fattype_t, type);
2201 
2202         if (type != FAT_UNKNOWN) {
2203                 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2204                 ASSERT(ISP2(secsize / DEV_BSIZE));
2205                 return (1);
2206         }
2207 
2208         return (0);
2209 }
2210 
2211 
2212 /*
2213  * Detect the device's native block size (sector size).
2214  *
2215  * Test whether the device is:
2216  *      - a floppy device from a known controller type via DKIOCINFO
2217  *      - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2218  *      - a PCMCIA sram memory card (pseudofloppy) using pcram(7d)
2219  *      - a USB floppy drive (identified by drive geometry)
2220  *
2221  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2222  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2223  *
2224  * This might be a bit wasteful on kernel stack space; if anyone's
2225  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2226  */
2227 static void
2228 pcfs_device_getinfo(struct pcfs *fsp)
2229 {
2230         dev_t                   rdev = fsp->pcfs_xdev;
2231         int                     error;
2232         union {
2233                 struct dk_minfo         mi;
2234                 struct dk_cinfo         ci;
2235                 struct dk_geom          gi;
2236                 struct fd_char          fc;
2237         } arg;                          /* save stackspace ... */
2238         intptr_t argp = (intptr_t)&arg;
2239         ldi_handle_t            lh;
2240         ldi_ident_t             li;
2241         int isfloppy, isremoveable, ishotpluggable;
2242         cred_t                  *cr = CRED();
2243 
2244         if (ldi_ident_from_dev(rdev, &li))
2245                 goto out;
2246 
2247         error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2248         ldi_ident_release(li);
2249         if (error)
2250                 goto out;
2251 
2252         /*
2253          * Not sure if this could possibly happen. It'd be a bit like
2254          * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2255          * expecting it, needs some thought if triggered ...
2256          */
2257         ASSERT(fsp->pcfs_xdev == rdev);
2258 
2259         /*
2260          * Check for removeable/hotpluggable media.
2261          */
2262         if (ldi_ioctl(lh, DKIOCREMOVABLE,
2263             (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2264                 isremoveable = 0;
2265         }
2266         if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2267             (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2268                 ishotpluggable = 0;
2269         }
2270 
2271         /*
2272          * Make sure we don't use "half-initialized" values if the ioctls fail.
2273          */
2274         if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2275                 bzero(&arg, sizeof (arg));
2276                 fsp->pcfs_mediasize = 0;
2277         } else {
2278                 fsp->pcfs_mediasize =
2279                     (len_t)arg.mi.dki_lbsize *
2280                     (len_t)arg.mi.dki_capacity;
2281         }
2282 
2283         if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2284                 if (fsp->pcfs_secsize == 0) {
2285                         fsp->pcfs_secsize = arg.mi.dki_lbsize;
2286                         fsp->pcfs_sdshift =
2287                             ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2288                 } else {
2289                         PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2290                             "%d, device (%x.%x), different from user-provided "
2291                             "%d. User override - ignoring autodetect result.\n",
2292                             arg.mi.dki_lbsize,
2293                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2294                             fsp->pcfs_secsize);
2295                 }
2296         } else if (arg.mi.dki_lbsize) {
2297                 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2298                     "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2299                     "Ignoring autodetect result.\n",
2300                     arg.mi.dki_lbsize,
2301                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2302         }
2303 
2304         /*
2305          * We treat the following media types as a floppy by default.
2306          */
2307         isfloppy =
2308             (arg.mi.dki_media_type == DK_FLOPPY ||
2309             arg.mi.dki_media_type == DK_ZIP ||
2310             arg.mi.dki_media_type == DK_JAZ);
2311 
2312         /*
2313          * if this device understands fdio(7I) requests it's
2314          * obviously a floppy drive.
2315          */
2316         if (!isfloppy &&
2317             !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2318                 isfloppy = 1;
2319 
2320         /*
2321          * some devices (PCMCIA pseudofloppies) we like to treat
2322          * as floppies, but they don't understand fdio(7I) requests.
2323          */
2324         if (!isfloppy &&
2325             !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2326             (arg.ci.dki_ctype == DKC_WDC2880 ||
2327             arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2328             arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2329             arg.ci.dki_ctype == DKC_INTEL82077 ||
2330             (arg.ci.dki_ctype == DKC_PCMCIA_MEM &&
2331             arg.ci.dki_flags & DKI_PCMCIA_PFD)))
2332                 isfloppy = 1;
2333 
2334         /*
2335          * This is the "final fallback" test - media with
2336          * 2 heads and 80 cylinders are assumed to be floppies.
2337          * This is normally true for USB floppy drives ...
2338          */
2339         if (!isfloppy &&
2340             !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2341             (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2342                 isfloppy = 1;
2343 
2344         /*
2345          * This is similar to the "old" PCFS code that sets this flag
2346          * just based on the media descriptor being 0xf8 (MD_FIXED).
2347          * Should be re-worked. We really need some specialcasing for
2348          * removeable media.
2349          */
2350         if (!isfloppy) {
2351                 fsp->pcfs_flags |= PCFS_NOCHK;
2352         }
2353 
2354         /*
2355          * We automatically disable access time updates if the medium is
2356          * removeable and/or hotpluggable, and the admin did not explicitly
2357          * request access time updates (via the "atime" mount option).
2358          * The majority of flash-based media should fit this category.
2359          * Minimizing write access extends the lifetime of your memory stick !
2360          */
2361         if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2362             (isremoveable || ishotpluggable | isfloppy)) {
2363                 fsp->pcfs_flags |= PCFS_NOATIME;
2364         }
2365 
2366         (void) ldi_close(lh, FREAD, cr);
2367 out:
2368         if (fsp->pcfs_secsize == 0) {
2369                 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2370                     "device (%x.%x) failed, no user-provided fallback. "
2371                     "Using %d bytes.\n",
2372                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2373                     DEV_BSIZE);
2374                 fsp->pcfs_secsize = DEV_BSIZE;
2375                 fsp->pcfs_sdshift = 0;
2376         }
2377         ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2378         ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2379 }
2380 
2381 /*
2382  * Get the FAT type for the DOS medium.
2383  *
2384  * -------------------------
2385  * According to Microsoft:
2386  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2387  * count of clusters on the volume and nothing else.
2388  * -------------------------
2389  *
2390  */
2391 static int
2392 pc_getfattype(struct pcfs *fsp)
2393 {
2394         int error = 0;
2395         buf_t *bp = NULL;
2396         struct vnode *devvp = fsp->pcfs_devvp;
2397         dev_t   dev = devvp->v_rdev;
2398 
2399         /*
2400          * Detect the native block size of the medium, and attempt to
2401          * detect whether the medium is removeable.
2402          * We do treat removeable media (floppies, PCMCIA memory cards,
2403          * USB and FireWire disks) differently wrt. to the frequency
2404          * and synchronicity of FAT updates.
2405          * We need to know the media block size in order to be able to
2406          * parse the partition table.
2407          */
2408         pcfs_device_getinfo(fsp);
2409 
2410         /*
2411          * Unpartitioned media (floppies and some removeable devices)
2412          * don't have a partition table, the FAT BPB is at disk block 0.
2413          * Start out by reading block 0.
2414          */
2415         fsp->pcfs_dosstart = 0;
2416         bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2417 
2418         if (error = geterror(bp))
2419                 goto out;
2420 
2421         /*
2422          * If a logical drive number is requested, parse the partition table
2423          * and attempt to locate it. Otherwise, proceed immediately to the
2424          * BPB check. findTheDrive(), if successful, returns the disk block
2425          * number where the requested partition starts in "startsec".
2426          */
2427         if (fsp->pcfs_ldrive != 0) {
2428                 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2429                     "device (%x,%x):%d to find BPB\n",
2430                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2431 
2432                 if (error = findTheDrive(fsp, &bp))
2433                         goto out;
2434 
2435                 ASSERT(fsp->pcfs_dosstart != 0);
2436 
2437                 brelse(bp);
2438                 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2439                     fsp->pcfs_secsize);
2440                 if (error = geterror(bp))
2441                         goto out;
2442         }
2443 
2444         /*
2445          * Validate the BPB and fill in the instance structure.
2446          */
2447         if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2448                 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2449                     "device (%x.%x):%d, disk LBA %u\n",
2450                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2451                     (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2452                 error = EINVAL;
2453                 goto out;
2454         }
2455 
2456         ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2457 
2458 out:
2459         /*
2460          * Release the buffer used
2461          */
2462         if (bp != NULL)
2463                 brelse(bp);
2464         return (error);
2465 }
2466 
2467 
2468 /*
2469  * Get the file allocation table.
2470  * If there is an old FAT, invalidate it.
2471  */
2472 int
2473 pc_getfat(struct pcfs *fsp)
2474 {
2475         struct buf *bp = NULL;
2476         uchar_t *fatp = NULL;
2477         uchar_t *fat_changemap = NULL;
2478         int error;
2479         int fat_changemapsize;
2480         int flags = 0;
2481         int nfat;
2482         int altfat_mustmatch = 0;
2483         int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2484 
2485         if (fsp->pcfs_fatp) {
2486                 /*
2487                  * There is a FAT in core.
2488                  * If there are open file pcnodes or we have modified it or
2489                  * it hasn't timed out yet use the in core FAT.
2490                  * Otherwise invalidate it and get a new one
2491                  */
2492 #ifdef notdef
2493                 if (fsp->pcfs_frefs ||
2494                     (fsp->pcfs_flags & PCFS_FATMOD) ||
2495                     (gethrestime_sec() < fsp->pcfs_fattime)) {
2496                         return (0);
2497                 } else {
2498                         mutex_enter(&pcfslock);
2499                         pc_invalfat(fsp);
2500                         mutex_exit(&pcfslock);
2501                 }
2502 #endif /* notdef */
2503                 return (0);
2504         }
2505 
2506         /*
2507          * Get FAT and check it for validity
2508          */
2509         fatp = kmem_alloc(fatsize, KM_SLEEP);
2510         error = pc_readfat(fsp, fatp);
2511         if (error) {
2512                 flags = B_ERROR;
2513                 goto out;
2514         }
2515         fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2516         fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2517         fsp->pcfs_fatp = fatp;
2518         fsp->pcfs_fat_changemapsize = fat_changemapsize;
2519         fsp->pcfs_fat_changemap = fat_changemap;
2520 
2521         /*
2522          * The only definite signature check is that the
2523          * media descriptor byte should match the first byte
2524          * of the FAT block.
2525          */
2526         if (fatp[0] != fsp->pcfs_mediadesc) {
2527                 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2528                     "media descriptor %x, FAT[0] lowbyte %x\n",
2529                     (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2530                 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2531                 altfat_mustmatch = 1;
2532         }
2533 
2534         /*
2535          * Get alternate FATs and check for consistency
2536          * This is an inlined version of pc_readfat().
2537          * Since we're only comparing FAT and alternate FAT,
2538          * there's no reason to let pc_readfat() copy data out
2539          * of the buf. Instead, compare in-situ, one cluster
2540          * at a time.
2541          */
2542         for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2543                 size_t startsec;
2544                 size_t off;
2545 
2546                 startsec = pc_dbdaddr(fsp,
2547                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2548 
2549                 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2550                         daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2551                             pc_cltodb(fsp, pc_lblkno(fsp, off)));
2552 
2553                         bp = bread(fsp->pcfs_xdev, fatblk,
2554                             MIN(fsp->pcfs_clsize, fatsize - off));
2555                         if (bp->b_flags & (B_ERROR | B_STALE)) {
2556                                 cmn_err(CE_NOTE,
2557                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2558                                     " read error at offset %ld on device"
2559                                     " (%x.%x):%d",
2560                                     nfat, (void *)(uintptr_t)startsec, off,
2561                                     getmajor(fsp->pcfs_xdev),
2562                                     getminor(fsp->pcfs_xdev),
2563                                     fsp->pcfs_ldrive);
2564                                 flags = B_ERROR;
2565                                 error = EIO;
2566                                 goto out;
2567                         }
2568                         bp->b_flags |= B_STALE | B_AGE;
2569                         if (bcmp(bp->b_un.b_addr, fatp + off,
2570                             MIN(fsp->pcfs_clsize, fatsize - off))) {
2571                                 cmn_err(CE_NOTE,
2572                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2573                                     " corrupted at offset %ld on device"
2574                                     " (%x.%x):%d",
2575                                     nfat, (void *)(uintptr_t)startsec, off,
2576                                     getmajor(fsp->pcfs_xdev),
2577                                     getminor(fsp->pcfs_xdev),
2578                                     fsp->pcfs_ldrive);
2579                                 if (altfat_mustmatch) {
2580                                         flags = B_ERROR;
2581                                         error = EIO;
2582                                         goto out;
2583                                 }
2584                         }
2585                         brelse(bp);
2586                         bp = NULL;      /* prevent double release */
2587                 }
2588         }
2589 
2590         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2591         fsp->pcfs_fatjustread = 1;
2592 
2593         /*
2594          * Retrieve FAT32 fsinfo sector.
2595          * A failure to read this is not fatal to accessing the volume.
2596          * It simply means operations that count or search free blocks
2597          * will have to do a full FAT walk, vs. a possibly quicker lookup
2598          * of the summary information.
2599          * Hence, we log a message but return success overall after this point.
2600          */
2601         if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2602                 struct fat_od_fsi *fsinfo_disk;
2603 
2604                 bp = bread(fsp->pcfs_xdev,
2605                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2606                 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2607                 if (bp->b_flags & (B_ERROR | B_STALE) ||
2608                     !FSISIG_OK(fsinfo_disk)) {
2609                         cmn_err(CE_NOTE,
2610                             "!pcfs: error reading fat32 fsinfo from "
2611                             "device (%x.%x):%d, block %lld",
2612                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2613                             fsp->pcfs_ldrive,
2614                             (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2615                         fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2616                         fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2617                         fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2618                 } else {
2619                         bp->b_flags |= B_STALE | B_AGE;
2620                         fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2621                         fsp->pcfs_fsinfo.fs_free_clusters =
2622                             LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2623                         fsp->pcfs_fsinfo.fs_next_free =
2624                             LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2625                 }
2626                 brelse(bp);
2627                 bp = NULL;
2628         }
2629 
2630         if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2631                 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2632         else
2633                 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2634 
2635         return (0);
2636 
2637 out:
2638         cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2639         if (bp)
2640                 brelse(bp);
2641         if (fatp)
2642                 kmem_free(fatp, fatsize);
2643         if (fat_changemap)
2644                 kmem_free(fat_changemap, fat_changemapsize);
2645 
2646         if (flags) {
2647                 pc_mark_irrecov(fsp);
2648         }
2649         return (error);
2650 }