1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "@(#)pc_vfsops.c 1.105 08/05/07 SMI"
27
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/user.h>
32 #include <sys/proc.h>
33 #include <sys/cred.h>
34 #include <sys/disp.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/vnode.h>
39 #include <sys/fdio.h>
40 #include <sys/file.h>
41 #include <sys/uio.h>
42 #include <sys/conf.h>
43 #include <sys/statvfs.h>
44 #include <sys/mount.h>
45 #include <sys/pathname.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/sysmacros.h>
49 #include <sys/conf.h>
50 #include <sys/mkdev.h>
51 #include <sys/swap.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/dktp/fdisk.h>
55 #include <sys/fs/pc_label.h>
56 #include <sys/fs/pc_fs.h>
57 #include <sys/fs/pc_dir.h>
58 #include <sys/fs/pc_node.h>
59 #include <fs/fs_subr.h>
60 #include <sys/modctl.h>
61 #include <sys/dkio.h>
62 #include <sys/open.h>
63 #include <sys/mntent.h>
64 #include <sys/policy.h>
65 #include <sys/atomic.h>
66 #include <sys/sdt.h>
67
68 /*
69 * The majority of PC media use a 512 sector size, but
70 * occasionally you will run across a 1k sector size.
71 * For media with a 1k sector size, fd_strategy() requires
72 * the I/O size to be a 1k multiple; so when the sector size
73 * is not yet known, always read 1k.
74 */
75 #define PC_SAFESECSIZE (PC_SECSIZE * 2)
76
77 static int pcfs_pseudo_floppy(dev_t);
78
79 static int pcfsinit(int, char *);
80 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
81 struct cred *);
82 static int pcfs_unmount(struct vfs *, int, struct cred *);
83 static int pcfs_root(struct vfs *, struct vnode **);
84 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
85 static int pc_syncfsnodes(struct pcfs *);
86 static int pcfs_sync(struct vfs *, short, struct cred *);
87 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
88 static void pcfs_freevfs(vfs_t *vfsp);
89
90 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
91 static int pc_writefat(struct pcfs *fsp, daddr_t start);
92
93 static int pc_getfattype(struct pcfs *fsp);
94 static void pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap);
95
96
97 /*
98 * pcfs mount options table
99 */
100
101 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
102 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
103 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
104 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
105 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
106 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
107 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
108 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
109
110 static mntopt_t mntopts[] = {
111 /*
112 * option name cancel option default arg flags opt data
113 */
114 { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
115 { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
116 { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
117 { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
118 { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
119 { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
120 { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
121 { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
122 { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
123 { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
124 };
125
126 static mntopts_t pcfs_mntopts = {
127 sizeof (mntopts) / sizeof (mntopt_t),
128 mntopts
129 };
130
131 int pcfsdebuglevel = 0;
132
133 /*
134 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab.
135 * pcfs_lock: (inside per filesystem structure "pcfs")
136 * per filesystem lock. Most of the vfsops and vnodeops are
137 * protected by this lock.
138 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
139 *
140 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
141 *
142 * pcfs_mountcount: used to prevent module unloads while there is still
143 * pcfs state from a former mount hanging around. With
144 * forced umount support, the filesystem module must not
145 * be allowed to go away before the last VFS_FREEVFS()
146 * call has been made.
147 * Since this is just an atomic counter, there's no need
148 * for locking.
149 */
150 kmutex_t pcfslock;
151 krwlock_t pcnodes_lock;
152 uint32_t pcfs_mountcount;
153
154 static int pcfstype;
155
156 static vfsdef_t vfw = {
157 VFSDEF_VERSION,
158 "pcfs",
159 pcfsinit,
160 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS,
161 &pcfs_mntopts
162 };
163
164 extern struct mod_ops mod_fsops;
165
166 static struct modlfs modlfs = {
167 &mod_fsops,
168 "PC filesystem v1.2",
169 &vfw
170 };
171
172 static struct modlinkage modlinkage = {
173 MODREV_1,
174 &modlfs,
175 NULL
176 };
177
178 int
179 _init(void)
180 {
181 int error;
182
183 #if !defined(lint)
184 /* make sure the on-disk structures are sane */
185 ASSERT(sizeof (struct pcdir) == 32);
186 ASSERT(sizeof (struct pcdir_lfn) == 32);
187 #endif
188 mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
189 rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
190 error = mod_install(&modlinkage);
191 if (error) {
192 mutex_destroy(&pcfslock);
193 rw_destroy(&pcnodes_lock);
194 }
195 return (error);
196 }
197
198 int
199 _fini(void)
200 {
201 int error;
202
203 /*
204 * If a forcedly unmounted instance is still hanging around,
205 * we cannot allow the module to be unloaded because that would
206 * cause panics once the VFS framework decides it's time to call
207 * into VFS_FREEVFS().
208 */
209 if (pcfs_mountcount)
210 return (EBUSY);
211
212 error = mod_remove(&modlinkage);
213 if (error)
214 return (error);
215 mutex_destroy(&pcfslock);
216 rw_destroy(&pcnodes_lock);
217 /*
218 * Tear down the operations vectors
219 */
220 (void) vfs_freevfsops_by_type(pcfstype);
221 vn_freevnodeops(pcfs_fvnodeops);
222 vn_freevnodeops(pcfs_dvnodeops);
223 return (0);
224 }
225
226 int
227 _info(struct modinfo *modinfop)
228 {
229 return (mod_info(&modlinkage, modinfop));
230 }
231
232 /* ARGSUSED1 */
233 static int
234 pcfsinit(int fstype, char *name)
235 {
236 static const fs_operation_def_t pcfs_vfsops_template[] = {
237 VFSNAME_MOUNT, { .vfs_mount = pcfs_mount },
238 VFSNAME_UNMOUNT, { .vfs_unmount = pcfs_unmount },
239 VFSNAME_ROOT, { .vfs_root = pcfs_root },
240 VFSNAME_STATVFS, { .vfs_statvfs = pcfs_statvfs },
241 VFSNAME_SYNC, { .vfs_sync = pcfs_sync },
242 VFSNAME_VGET, { .vfs_vget = pcfs_vget },
243 VFSNAME_FREEVFS, { .vfs_freevfs = pcfs_freevfs },
244 NULL, NULL
245 };
246 int error;
247
248 error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
249 if (error != 0) {
250 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
251 return (error);
252 }
253
254 error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
255 if (error != 0) {
256 (void) vfs_freevfsops_by_type(fstype);
257 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
258 return (error);
259 }
260
261 error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
262 if (error != 0) {
263 (void) vfs_freevfsops_by_type(fstype);
264 vn_freevnodeops(pcfs_fvnodeops);
265 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
266 return (error);
267 }
268
269 pcfstype = fstype;
270 (void) pc_init();
271 pcfs_mountcount = 0;
272 return (0);
273 }
274
275 static struct pcfs *pc_mounttab = NULL;
276
277 extern struct pcfs_args pc_tz;
278
279 /*
280 * Define some special logical drives we use internal to this file.
281 */
282 #define BOOT_PARTITION_DRIVE 99
283 #define PRIMARY_DOS_DRIVE 1
284 #define UNPARTITIONED_DRIVE 0
285
286 static int
287 pcfs_device_identify(
288 struct vfs *vfsp,
289 struct mounta *uap,
290 struct cred *cr,
291 int *dos_ldrive,
292 dev_t *xdev)
293 {
294 struct pathname special;
295 char *c;
296 struct vnode *svp = NULL;
297 struct vnode *lvp = NULL;
298 int oflag, aflag;
299 int error;
300
301 /*
302 * Resolve path name of special file being mounted.
303 */
304 if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
305 return (error);
306 }
307
308 *dos_ldrive = -1;
309
310 if (error =
311 lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
312 /*
313 * If there's no device node, the name specified most likely
314 * maps to a PCFS-style "partition specifier" to select a
315 * harddisk primary/logical partition. Disable floppy-specific
316 * checks in such cases unless an explicit :A or :B is
317 * requested.
318 */
319
320 /*
321 * Split the pathname string at the last ':' separator.
322 * If there's no ':' in the device name, or the ':' is the
323 * last character in the string, the name is invalid and
324 * the error from the previous lookup will be returned.
325 */
326 c = strrchr(special.pn_path, ':');
327 if (c == NULL || strlen(c) == 0)
328 goto devlookup_done;
329
330 *c++ = '\0';
331
332 /*
333 * PCFS partition name suffixes can be:
334 * - "boot" to indicate the X86BOOT partition
335 * - a drive letter [c-z] for the "DOS logical drive"
336 * - a drive number 1..24 for the "DOS logical drive"
337 * - a "floppy name letter", 'a' or 'b' (just strip this)
338 */
339 if (strcasecmp(c, "boot") == 0) {
340 /*
341 * The Solaris boot partition is requested.
342 */
343 *dos_ldrive = BOOT_PARTITION_DRIVE;
344 } else if (strspn(c, "0123456789") == strlen(c)) {
345 /*
346 * All digits - parse the partition number.
347 */
348 long drvnum = 0;
349
350 if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
351 /*
352 * A number alright - in the allowed range ?
353 */
354 if (drvnum > 24 || drvnum == 0)
355 error = ENXIO;
356 }
357 if (error)
358 goto devlookup_done;
359 *dos_ldrive = (int)drvnum;
360 } else if (strlen(c) == 1) {
361 /*
362 * A single trailing character was specified.
363 * - [c-zC-Z] means a harddisk partition, and
364 * we retrieve the partition number.
365 * - [abAB] means a floppy drive, so we swallow
366 * the "drive specifier" and test later
367 * whether the physical device is a floppy or
368 * PCMCIA pseudofloppy (sram card).
369 */
370 *c = tolower(*c);
371 if (*c == 'a' || *c == 'b') {
372 *dos_ldrive = UNPARTITIONED_DRIVE;
373 } else if (*c < 'c' || *c > 'z') {
374 error = ENXIO;
375 goto devlookup_done;
376 } else {
377 *dos_ldrive = 1 + *c - 'c';
378 }
379 } else {
380 /*
381 * Can't parse this - pass through previous error.
382 */
383 goto devlookup_done;
384 }
385
386
387 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
388 NULLVPP, &svp);
389 } else {
390 *dos_ldrive = UNPARTITIONED_DRIVE;
391 }
392 devlookup_done:
393 pn_free(&special);
394 if (error)
395 return (error);
396
397 ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
398
399 /*
400 * Verify caller's permission to open the device special file.
401 */
402 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
403 ((uap->flags & MS_RDONLY) != 0)) {
404 oflag = FREAD;
405 aflag = VREAD;
406 } else {
407 oflag = FREAD | FWRITE;
408 aflag = VREAD | VWRITE;
409 }
410
411 error = vfs_get_lofi(vfsp, &lvp);
412
413 if (error > 0) {
414 if (error == ENOENT)
415 error = ENODEV;
416 goto out;
417 } else if (error == 0) {
418 *xdev = lvp->v_rdev;
419 } else {
420 *xdev = svp->v_rdev;
421
422 if (svp->v_type != VBLK)
423 error = ENOTBLK;
424
425 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
426 goto out;
427 }
428
429 if (getmajor(*xdev) >= devcnt) {
430 error = ENXIO;
431 goto out;
432 }
433
434 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
435 goto out;
436
437 out:
438 if (svp != NULL)
439 VN_RELE(svp);
440 if (lvp != NULL)
441 VN_RELE(lvp);
442 return (error);
443 }
444
445 static int
446 pcfs_device_ismounted(
447 struct vfs *vfsp,
448 int dos_ldrive,
449 dev_t xdev,
450 int *remounting,
451 dev_t *pseudodev)
452 {
453 struct pcfs *fsp;
454 int remount = *remounting;
455
456 /*
457 * Ensure that this logical drive isn't already mounted, unless
458 * this is a REMOUNT request.
459 * Note: The framework will perform this check if the "...:c"
460 * PCFS-style "logical drive" syntax has not been used and an
461 * actually existing physical device is backing this filesystem.
462 * Once all block device drivers support PC-style partitioning,
463 * this codeblock can be dropped.
464 */
465 *pseudodev = xdev;
466
467 if (dos_ldrive) {
468 mutex_enter(&pcfslock);
469 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
470 if (fsp->pcfs_xdev == xdev &&
471 fsp->pcfs_ldrive == dos_ldrive) {
472 mutex_exit(&pcfslock);
473 if (remount) {
474 return (0);
475 } else {
476 return (EBUSY);
477 }
478 }
479 /*
480 * Assign a unique device number for the vfs
481 * The old way (getudev() + a constantly incrementing
482 * major number) was wrong because it changes vfs_dev
483 * across mounts and reboots, which breaks nfs file handles.
484 * UFS just uses the real dev_t. We can't do that because
485 * of the way pcfs opens fdisk partitons (the :c and :d
486 * partitions are on the same dev_t). Though that _might_
487 * actually be ok, since the file handle contains an
488 * absolute block number, it's probably better to make them
489 * different. So I think we should retain the original
490 * dev_t, but come up with a different minor number based
491 * on the logical drive that will _always_ come up the same.
492 * For now, we steal the upper 6 bits.
493 */
494 #ifdef notdef
495 /* what should we do here? */
496 if (((getminor(xdev) >> 12) & 0x3F) != 0)
497 printf("whoops - upper bits used!\n");
498 #endif
499 *pseudodev = makedevice(getmajor(xdev),
500 ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
501 if (vfs_devmounting(*pseudodev, vfsp)) {
502 mutex_exit(&pcfslock);
503 return (EBUSY);
504 }
505 if (vfs_devismounted(*pseudodev)) {
506 mutex_exit(&pcfslock);
507 if (remount) {
508 return (0);
509 } else {
510 return (EBUSY);
511 }
512 }
513 mutex_exit(&pcfslock);
514 } else {
515 *pseudodev = xdev;
516 if (vfs_devmounting(*pseudodev, vfsp)) {
517 return (EBUSY);
518 }
519 if (vfs_devismounted(*pseudodev))
520 if (remount) {
521 return (0);
522 } else {
523 return (EBUSY);
524 }
525 }
526
527 /*
528 * This is not a remount. Even if MS_REMOUNT was requested,
529 * the caller needs to proceed as it would on an ordinary
530 * mount.
531 */
532 *remounting = 0;
533
534 ASSERT(*pseudodev);
535 return (0);
536 }
537
538 /*
539 * Get the PCFS-specific mount options from the VFS framework.
540 * For "timezone" and "secsize", we need to parse the number
541 * ourselves and ensure its validity.
542 * Note: "secsize" is deliberately undocumented at this time,
543 * it's a workaround for devices (particularly: lofi image files)
544 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
545 */
546 static void
547 pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap)
548 {
549 char *c;
550 char *endptr;
551 long l;
552 struct vfs *vfsp = fsp->pcfs_vfs;
553
554 ASSERT(fsp->pcfs_secondswest == 0);
555 ASSERT(fsp->pcfs_secsize == 0);
556
557 if (uap->flags & MS_RDONLY) {
558 vfsp->vfs_flag |= VFS_RDONLY;
559 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
560 }
561
562 if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
563 fsp->pcfs_flags |= PCFS_HIDDEN;
564 if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
565 fsp->pcfs_flags |= PCFS_FOLDCASE;
566 if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
567 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
568 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
569 fsp->pcfs_flags |= PCFS_NOATIME;
570
571 if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
572 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
573 endptr == c + strlen(c)) {
574 /*
575 * A number alright - in the allowed range ?
576 */
577 if (l <= -12*3600 || l >= 12*3600) {
578 cmn_err(CE_WARN, "!pcfs: invalid use of "
579 "'timezone' mount option - %ld "
580 "is out of range. Assuming 0.", l);
581 l = 0;
582 }
583 } else {
584 cmn_err(CE_WARN, "!pcfs: invalid use of "
585 "'timezone' mount option - argument %s "
586 "is not a valid number. Assuming 0.", c);
587 l = 0;
588 }
589 fsp->pcfs_secondswest = l;
590 }
591
592 /*
593 * The "secsize=..." mount option is a workaround for the lack of
594 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
595 * partition table of a disk image and it has been partitioned with
596 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
597 * images.
598 * That should really be fixed in lofi ... this is a workaround.
599 */
600 if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
601 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
602 endptr == c + strlen(c)) {
603 /*
604 * A number alright - a valid sector size as well ?
605 */
606 if (!VALID_SECSIZE(l)) {
607 cmn_err(CE_WARN, "!pcfs: invalid use of "
608 "'secsize' mount option - %ld is "
609 "unsupported. Autodetecting.", l);
610 l = 0;
611 }
612 } else {
613 cmn_err(CE_WARN, "!pcfs: invalid use of "
614 "'secsize' mount option - argument %s "
615 "is not a valid number. Autodetecting.", c);
616 l = 0;
617 }
618 fsp->pcfs_secsize = l;
619 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
620 }
621 }
622
623 /*
624 * vfs operations
625 */
626
627 /*
628 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
629 */
630 static int
631 pcfs_mount(
632 struct vfs *vfsp,
633 struct vnode *mvp,
634 struct mounta *uap,
635 struct cred *cr)
636 {
637 struct pcfs *fsp;
638 struct vnode *devvp;
639 dev_t pseudodev;
640 dev_t xdev;
641 int dos_ldrive = 0;
642 int error;
643 int remounting;
644
645 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
646 return (error);
647
648 if (mvp->v_type != VDIR)
649 return (ENOTDIR);
650
651 mutex_enter(&mvp->v_lock);
652 if ((uap->flags & MS_REMOUNT) == 0 &&
653 (uap->flags & MS_OVERLAY) == 0 &&
654 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
655 mutex_exit(&mvp->v_lock);
656 return (EBUSY);
657 }
658 mutex_exit(&mvp->v_lock);
659
660 /*
661 * PCFS doesn't do mount arguments anymore - everything's a mount
662 * option these days. In order not to break existing callers, we
663 * don't reject it yet, just warn that the data (if any) is ignored.
664 */
665 if (uap->datalen != 0)
666 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
667 "mount argument structures instead of mount options. "
668 "Ignoring mount(2) 'dataptr' argument.");
669
670 /*
671 * For most filesystems, this is just a lookupname() on the
672 * mount pathname string. PCFS historically has to do its own
673 * partition table parsing because not all Solaris architectures
674 * support all styles of partitioning that PC media can have, and
675 * hence PCFS understands "device names" that don't map to actual
676 * physical device nodes. Parsing the "PCFS syntax" for device
677 * names is done in pcfs_device_identify() - see there.
678 *
679 * Once all block device drivers that can host FAT filesystems have
680 * been enhanced to create device nodes for all PC-style partitions,
681 * this code can go away.
682 */
683 if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
684 return (error);
685
686 /*
687 * As with looking up the actual device to mount, PCFS cannot rely
688 * on just the checks done by vfs_ismounted() whether a given device
689 * is mounted already. The additional check against the "PCFS syntax"
690 * is done in pcfs_device_ismounted().
691 */
692 remounting = (uap->flags & MS_REMOUNT);
693
694 if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
695 &pseudodev))
696 return (error);
697
698 if (remounting)
699 return (0);
700
701 /*
702 * Mount the filesystem.
703 * An instance structure is required before the attempt to locate
704 * and parse the FAT BPB. This is because mount options may change
705 * the behaviour of the filesystem type matching code. Precreate
706 * it and fill it in to a degree that allows parsing the mount
707 * options.
708 */
709 devvp = makespecvp(xdev, VBLK);
710 if (IS_SWAPVP(devvp)) {
711 VN_RELE(devvp);
712 return (EBUSY);
713 }
714 error = VOP_OPEN(&devvp,
715 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
716 if (error) {
717 VN_RELE(devvp);
718 return (error);
719 }
720
721 fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
722 fsp->pcfs_vfs = vfsp;
723 fsp->pcfs_xdev = xdev;
724 fsp->pcfs_devvp = devvp;
725 fsp->pcfs_ldrive = dos_ldrive;
726 mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
727 vfsp->vfs_data = fsp;
728 vfsp->vfs_dev = pseudodev;
729 vfsp->vfs_fstype = pcfstype;
730 vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
731 vfsp->vfs_bcount = 0;
732 vfsp->vfs_bsize = fsp->pcfs_clsize;
733
734 pcfs_parse_mntopts(fsp, uap);
735
736 /*
737 * This is the actual "mount" - the PCFS superblock check.
738 *
739 * Find the requested logical drive and the FAT BPB therein.
740 * Check device type and flag the instance if media is removeable.
741 *
742 * Initializes most members of the filesystem instance structure.
743 * Returns EINVAL if no valid BPB can be found. Other errors may
744 * occur after I/O failures, or when invalid / unparseable partition
745 * tables are encountered.
746 */
747 if (error = pc_getfattype(fsp))
748 goto errout;
749
750 /*
751 * Validate that we can access the FAT and that it is, to the
752 * degree we can verify here, self-consistent.
753 */
754 if (error = pc_verify(fsp))
755 goto errout;
756
757 /*
758 * Record the time of the mount, to return as an "approximate"
759 * timestamp for the FAT root directory. Since FAT roots don't
760 * have timestamps, this is less confusing to the user than
761 * claiming "zero" / Jan/01/1970.
762 */
763 gethrestime(&fsp->pcfs_mounttime);
764
765 /*
766 * Fix up the mount options. Because "noatime" is made default on
767 * removeable media only, a fixed disk will have neither "atime"
768 * nor "noatime" set. We set the options explicitly depending on
769 * the PCFS_NOATIME flag, to inform the user of what applies.
770 * Mount option cancellation will take care that the mutually
771 * exclusive 'other' is cleared.
772 */
773 vfs_setmntopt(vfsp,
774 fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
775 NULL, 0);
776
777 /*
778 * All clear - insert the FS instance into PCFS' list.
779 */
780 mutex_enter(&pcfslock);
781 fsp->pcfs_nxt = pc_mounttab;
782 pc_mounttab = fsp;
783 mutex_exit(&pcfslock);
784 atomic_inc_32(&pcfs_mountcount);
785 return (0);
786
787 errout:
788 (void) VOP_CLOSE(devvp,
789 vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
790 1, (offset_t)0, cr, NULL);
791 VN_RELE(devvp);
792 mutex_destroy(&fsp->pcfs_lock);
793 kmem_free(fsp, sizeof (*fsp));
794 return (error);
795
796 }
797
798 static int
799 pcfs_unmount(
800 struct vfs *vfsp,
801 int flag,
802 struct cred *cr)
803 {
804 struct pcfs *fsp, *fsp1;
805
806 if (secpolicy_fs_unmount(cr, vfsp) != 0)
807 return (EPERM);
808
809 fsp = VFSTOPCFS(vfsp);
810
811 /*
812 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
813 * prevent lookuppn from crossing the mount point.
814 * If this is not a forced umount request and there's ongoing I/O,
815 * don't allow the mount to proceed.
816 */
817 if (flag & MS_FORCE)
818 vfsp->vfs_flag |= VFS_UNMOUNTED;
819 else if (fsp->pcfs_nrefs)
820 return (EBUSY);
821
822 mutex_enter(&pcfslock);
823
824 /*
825 * If this is a forced umount request or if the fs instance has
826 * been marked as beyond recovery, allow the umount to proceed
827 * regardless of state. pc_diskchanged() forcibly releases all
828 * inactive vnodes/pcnodes.
829 */
830 if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
831 rw_enter(&pcnodes_lock, RW_WRITER);
832 pc_diskchanged(fsp);
833 rw_exit(&pcnodes_lock);
834 }
835
836 /* now there should be no pcp node on pcfhead or pcdhead. */
837
838 if (fsp == pc_mounttab) {
839 pc_mounttab = fsp->pcfs_nxt;
840 } else {
841 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
842 if (fsp1->pcfs_nxt == fsp)
843 fsp1->pcfs_nxt = fsp->pcfs_nxt;
844 }
845
846 mutex_exit(&pcfslock);
847
848 /*
849 * Since we support VFS_FREEVFS(), there's no need to
850 * free the fsp right now. The framework will tell us
851 * when the right time to do so has arrived by calling
852 * into pcfs_freevfs.
853 */
854 return (0);
855 }
856
857 /*
858 * find root of pcfs
859 */
860 static int
861 pcfs_root(
862 struct vfs *vfsp,
863 struct vnode **vpp)
864 {
865 struct pcfs *fsp;
866 struct pcnode *pcp;
867 int error;
868
869 fsp = VFSTOPCFS(vfsp);
870 if (error = pc_lockfs(fsp, 0, 0))
871 return (error);
872
873 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
874 pc_unlockfs(fsp);
875 *vpp = PCTOV(pcp);
876 pcp->pc_flags |= PC_EXTERNAL;
877 return (0);
878 }
879
880 /*
881 * Get file system statistics.
882 */
883 static int
884 pcfs_statvfs(
885 struct vfs *vfsp,
886 struct statvfs64 *sp)
887 {
888 struct pcfs *fsp;
889 int error;
890 dev32_t d32;
891
892 fsp = VFSTOPCFS(vfsp);
893 error = pc_getfat(fsp);
894 if (error)
895 return (error);
896 bzero(sp, sizeof (*sp));
897 sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
898 sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
899 sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
900 sp->f_files = (fsfilcnt64_t)-1;
901 sp->f_ffree = (fsfilcnt64_t)-1;
902 sp->f_favail = (fsfilcnt64_t)-1;
903 #ifdef notdef
904 (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
905 #endif /* notdef */
906 (void) cmpldev(&d32, vfsp->vfs_dev);
907 sp->f_fsid = d32;
908 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
909 sp->f_flag = vf_to_stf(vfsp->vfs_flag);
910 sp->f_namemax = PCFNAMESIZE;
911 return (0);
912 }
913
914 static int
915 pc_syncfsnodes(struct pcfs *fsp)
916 {
917 struct pchead *hp;
918 struct pcnode *pcp;
919 int error;
920
921 if (error = pc_lockfs(fsp, 0, 0))
922 return (error);
923
924 if (!(error = pc_syncfat(fsp))) {
925 hp = pcfhead;
926 while (hp < & pcfhead [ NPCHASH ]) {
927 rw_enter(&pcnodes_lock, RW_READER);
928 pcp = hp->pch_forw;
929 while (pcp != (struct pcnode *)hp) {
930 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
931 if (error = pc_nodesync(pcp))
932 break;
933 pcp = pcp -> pc_forw;
934 }
935 rw_exit(&pcnodes_lock);
936 if (error)
937 break;
938 hp++;
939 }
940 }
941 pc_unlockfs(fsp);
942 return (error);
943 }
944
945 /*
946 * Flush any pending I/O.
947 */
948 /*ARGSUSED*/
949 static int
950 pcfs_sync(
951 struct vfs *vfsp,
952 short flag,
953 struct cred *cr)
954 {
955 struct pcfs *fsp;
956 int error = 0;
957
958 /* this prevents the filesystem from being umounted. */
959 mutex_enter(&pcfslock);
960 if (vfsp != NULL) {
961 fsp = VFSTOPCFS(vfsp);
962 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
963 error = pc_syncfsnodes(fsp);
964 } else {
965 rw_enter(&pcnodes_lock, RW_WRITER);
966 pc_diskchanged(fsp);
967 rw_exit(&pcnodes_lock);
968 error = EIO;
969 }
970 } else {
971 fsp = pc_mounttab;
972 while (fsp != NULL) {
973 if (fsp->pcfs_flags & PCFS_IRRECOV) {
974 rw_enter(&pcnodes_lock, RW_WRITER);
975 pc_diskchanged(fsp);
976 rw_exit(&pcnodes_lock);
977 error = EIO;
978 break;
979 }
980 error = pc_syncfsnodes(fsp);
981 if (error) break;
982 fsp = fsp->pcfs_nxt;
983 }
984 }
985 mutex_exit(&pcfslock);
986 return (error);
987 }
988
989 int
990 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
991 {
992 int err;
993
994 if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
995 return (EIO);
996
997 if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
998 fsp->pcfs_count++;
999 } else {
1000 mutex_enter(&fsp->pcfs_lock);
1001 if (fsp->pcfs_flags & PCFS_LOCKED)
1002 panic("pc_lockfs");
1003 /*
1004 * We check the IRRECOV bit again just in case somebody
1005 * snuck past the initial check but then got held up before
1006 * they could grab the lock. (And in the meantime someone
1007 * had grabbed the lock and set the bit)
1008 */
1009 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1010 if ((err = pc_getfat(fsp))) {
1011 mutex_exit(&fsp->pcfs_lock);
1012 return (err);
1013 }
1014 }
1015 fsp->pcfs_flags |= PCFS_LOCKED;
1016 fsp->pcfs_owner = curthread;
1017 fsp->pcfs_count++;
1018 }
1019 return (0);
1020 }
1021
1022 void
1023 pc_unlockfs(struct pcfs *fsp)
1024 {
1025
1026 if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1027 panic("pc_unlockfs");
1028 if (--fsp->pcfs_count < 0)
1029 panic("pc_unlockfs: count");
1030 if (fsp->pcfs_count == 0) {
1031 fsp->pcfs_flags &= ~PCFS_LOCKED;
1032 fsp->pcfs_owner = 0;
1033 mutex_exit(&fsp->pcfs_lock);
1034 }
1035 }
1036
1037 int
1038 pc_syncfat(struct pcfs *fsp)
1039 {
1040 struct buf *bp;
1041 int nfat;
1042 int error = 0;
1043 struct fat_od_fsi *fsinfo_disk;
1044
1045 if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1046 !(fsp->pcfs_flags & PCFS_FATMOD))
1047 return (0);
1048 /*
1049 * write out all copies of FATs
1050 */
1051 fsp->pcfs_flags &= ~PCFS_FATMOD;
1052 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1053 for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1054 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1055 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1056 if (error) {
1057 pc_mark_irrecov(fsp);
1058 return (EIO);
1059 }
1060 }
1061 pc_clear_fatchanges(fsp);
1062
1063 /*
1064 * Write out fsinfo sector.
1065 */
1066 if (IS_FAT32(fsp)) {
1067 bp = bread(fsp->pcfs_xdev,
1068 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1069 if (bp->b_flags & (B_ERROR | B_STALE)) {
1070 error = geterror(bp);
1071 }
1072 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1073 if (!error && FSISIG_OK(fsinfo_disk)) {
1074 fsinfo_disk->fsi_incore.fs_free_clusters =
1075 LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1076 fsinfo_disk->fsi_incore.fs_next_free =
1077 LE_32(FSINFO_UNKNOWN);
1078 bwrite2(bp);
1079 error = geterror(bp);
1080 }
1081 brelse(bp);
1082 if (error) {
1083 pc_mark_irrecov(fsp);
1084 return (EIO);
1085 }
1086 }
1087 return (0);
1088 }
1089
1090 void
1091 pc_invalfat(struct pcfs *fsp)
1092 {
1093 struct pcfs *xfsp;
1094 int mount_cnt = 0;
1095
1096 if (fsp->pcfs_fatp == (uchar_t *)0)
1097 panic("pc_invalfat");
1098 /*
1099 * Release FAT
1100 */
1101 kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1102 fsp->pcfs_fatp = NULL;
1103 kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1104 fsp->pcfs_fat_changemap = NULL;
1105 /*
1106 * Invalidate all the blocks associated with the device.
1107 * Not needed if stateless.
1108 */
1109 for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1110 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1111 mount_cnt++;
1112
1113 if (!mount_cnt)
1114 binval(fsp->pcfs_xdev);
1115 /*
1116 * close mounted device
1117 */
1118 (void) VOP_CLOSE(fsp->pcfs_devvp,
1119 (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1120 1, (offset_t)0, CRED(), NULL);
1121 }
1122
1123 void
1124 pc_badfs(struct pcfs *fsp)
1125 {
1126 cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1127 getmajor(fsp->pcfs_devvp->v_rdev),
1128 getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1129 }
1130
1131 /*
1132 * The problem with supporting NFS on the PCFS filesystem is that there
1133 * is no good place to keep the generation number. The only possible
1134 * place is inside a directory entry. There are a few words that we
1135 * don't use - they store NT & OS/2 attributes, and the creation/last access
1136 * time of the file - but it seems wrong to use them. In addition, directory
1137 * entries come and go. If a directory is removed completely, its directory
1138 * blocks are freed and the generation numbers are lost. Whereas in ufs,
1139 * inode blocks are dedicated for inodes, so the generation numbers are
1140 * permanently kept on the disk.
1141 */
1142 static int
1143 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1144 {
1145 struct pcnode *pcp;
1146 struct pc_fid *pcfid;
1147 struct pcfs *fsp;
1148 struct pcdir *ep;
1149 daddr_t eblkno;
1150 int eoffset;
1151 struct buf *bp;
1152 int error;
1153 pc_cluster32_t cn;
1154
1155 pcfid = (struct pc_fid *)fidp;
1156 fsp = VFSTOPCFS(vfsp);
1157
1158 error = pc_lockfs(fsp, 0, 0);
1159 if (error) {
1160 *vpp = NULL;
1161 return (error);
1162 }
1163
1164 if (pcfid->pcfid_block == 0) {
1165 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1166 pcp->pc_flags |= PC_EXTERNAL;
1167 *vpp = PCTOV(pcp);
1168 pc_unlockfs(fsp);
1169 return (0);
1170 }
1171 eblkno = pcfid->pcfid_block;
1172 eoffset = pcfid->pcfid_offset;
1173
1174 if ((pc_dbtocl(fsp,
1175 eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1176 (eoffset > fsp->pcfs_clsize)) {
1177 pc_unlockfs(fsp);
1178 *vpp = NULL;
1179 return (EINVAL);
1180 }
1181
1182 if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1183 < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1184 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1185 fsp->pcfs_clsize);
1186 } else {
1187 /*
1188 * This is an access "backwards" into the FAT12/FAT16
1189 * root directory. A better code structure would
1190 * significantly improve maintainability here ...
1191 */
1192 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1193 (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1194 }
1195 if (bp->b_flags & (B_ERROR | B_STALE)) {
1196 error = geterror(bp);
1197 brelse(bp);
1198 if (error)
1199 pc_mark_irrecov(fsp);
1200 *vpp = NULL;
1201 pc_unlockfs(fsp);
1202 return (error);
1203 }
1204 ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1205 /*
1206 * Ok, if this is a valid file handle that we gave out,
1207 * then simply ensuring that the creation time matches,
1208 * the entry has not been deleted, and it has a valid first
1209 * character should be enough.
1210 *
1211 * Unfortunately, verifying that the <blkno, offset> _still_
1212 * refers to a directory entry is not easy, since we'd have
1213 * to search _all_ directories starting from root to find it.
1214 * That's a high price to pay just in case somebody is forging
1215 * file handles. So instead we verify that as much of the
1216 * entry is valid as we can:
1217 *
1218 * 1. The starting cluster is 0 (unallocated) or valid
1219 * 2. It is not an LFN entry
1220 * 3. It is not hidden (unless mounted as such)
1221 * 4. It is not the label
1222 */
1223 cn = pc_getstartcluster(fsp, ep);
1224 /*
1225 * if the starting cluster is valid, but not valid according
1226 * to pc_validcl(), force it to be to simplify the following if.
1227 */
1228 if (cn == 0)
1229 cn = PCF_FIRSTCLUSTER;
1230 if (IS_FAT32(fsp)) {
1231 if (cn >= PCF_LASTCLUSTER32)
1232 cn = PCF_FIRSTCLUSTER;
1233 } else {
1234 if (cn >= PCF_LASTCLUSTER)
1235 cn = PCF_FIRSTCLUSTER;
1236 }
1237 if ((!pc_validcl(fsp, cn)) ||
1238 (PCDL_IS_LFN(ep)) ||
1239 (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1240 ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1241 bp->b_flags |= B_STALE | B_AGE;
1242 brelse(bp);
1243 pc_unlockfs(fsp);
1244 return (EINVAL);
1245 }
1246 if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1247 (ep->pcd_filename[0] != PCD_ERASED) &&
1248 (pc_validchar(ep->pcd_filename[0]) ||
1249 (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1250 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1251 pcp->pc_flags |= PC_EXTERNAL;
1252 *vpp = PCTOV(pcp);
1253 } else {
1254 *vpp = NULL;
1255 }
1256 bp->b_flags |= B_STALE | B_AGE;
1257 brelse(bp);
1258 pc_unlockfs(fsp);
1259 return (0);
1260 }
1261
1262 /*
1263 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1264 * a meg), so we can't bread() it all in at once. This routine reads a
1265 * fat a chunk at a time.
1266 */
1267 static int
1268 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1269 {
1270 struct buf *bp;
1271 size_t off;
1272 size_t readsize;
1273 daddr_t diskblk;
1274 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1275 daddr_t start = fsp->pcfs_fatstart;
1276
1277 readsize = fsp->pcfs_clsize;
1278 for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1279 if (readsize > (fatsize - off))
1280 readsize = fatsize - off;
1281 diskblk = pc_dbdaddr(fsp, start +
1282 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1283 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1284 if (bp->b_flags & (B_ERROR | B_STALE)) {
1285 brelse(bp);
1286 return (EIO);
1287 }
1288 bp->b_flags |= B_STALE | B_AGE;
1289 bcopy(bp->b_un.b_addr, fatp, readsize);
1290 brelse(bp);
1291 }
1292 return (0);
1293 }
1294
1295 /*
1296 * We write the FAT out a _lot_, in order to make sure that it
1297 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1298 * the FAT might be a couple of megabytes, and writing it all out just
1299 * because we created or deleted a small file is painful (especially
1300 * since we do it for each alternate FAT too). So instead, for FAT16 and
1301 * FAT32 we only write out the bit that has changed. We don't clear
1302 * the 'updated' fields here because the caller might be writing out
1303 * several FATs, so the caller must use pc_clear_fatchanges() after
1304 * all FATs have been updated.
1305 * This function doesn't take "start" from fsp->pcfs_dosstart because
1306 * callers can use it to write either the primary or any of the alternate
1307 * FAT tables.
1308 */
1309 static int
1310 pc_writefat(struct pcfs *fsp, daddr_t start)
1311 {
1312 struct buf *bp;
1313 size_t off;
1314 size_t writesize;
1315 int error;
1316 uchar_t *fatp = fsp->pcfs_fatp;
1317 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1318
1319 writesize = fsp->pcfs_clsize;
1320 for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1321 if (writesize > (fatsize - off))
1322 writesize = fatsize - off;
1323 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1324 continue;
1325 }
1326 bp = ngeteblk(writesize);
1327 bp->b_edev = fsp->pcfs_xdev;
1328 bp->b_dev = cmpdev(bp->b_edev);
1329 bp->b_blkno = pc_dbdaddr(fsp, start +
1330 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1331 bcopy(fatp, bp->b_un.b_addr, writesize);
1332 bwrite2(bp);
1333 error = geterror(bp);
1334 brelse(bp);
1335 if (error) {
1336 return (error);
1337 }
1338 }
1339 return (0);
1340 }
1341
1342 /*
1343 * Mark the FAT cluster that 'cn' is stored in as modified.
1344 */
1345 void
1346 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1347 {
1348 pc_cluster32_t bn;
1349 size_t size;
1350
1351 /* which fat block is the cluster number stored in? */
1352 if (IS_FAT32(fsp)) {
1353 size = sizeof (pc_cluster32_t);
1354 bn = pc_lblkno(fsp, cn * size);
1355 fsp->pcfs_fat_changemap[bn] = 1;
1356 } else if (IS_FAT16(fsp)) {
1357 size = sizeof (pc_cluster16_t);
1358 bn = pc_lblkno(fsp, cn * size);
1359 fsp->pcfs_fat_changemap[bn] = 1;
1360 } else {
1361 offset_t off;
1362 pc_cluster32_t nbn;
1363
1364 ASSERT(IS_FAT12(fsp));
1365 off = cn + (cn >> 1);
1366 bn = pc_lblkno(fsp, off);
1367 fsp->pcfs_fat_changemap[bn] = 1;
1368 /* does this field wrap into the next fat cluster? */
1369 nbn = pc_lblkno(fsp, off + 1);
1370 if (nbn != bn) {
1371 fsp->pcfs_fat_changemap[nbn] = 1;
1372 }
1373 }
1374 }
1375
1376 /*
1377 * return whether the FAT cluster 'bn' is updated and needs to
1378 * be written out.
1379 */
1380 int
1381 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1382 {
1383 return (fsp->pcfs_fat_changemap[bn] == 1);
1384 }
1385
1386 /*
1387 * Implementation of VFS_FREEVFS() to support forced umounts.
1388 * This is called by the vfs framework after umount, to trigger
1389 * the release of any resources still associated with the given
1390 * vfs_t once the need to keep them has gone away.
1391 */
1392 void
1393 pcfs_freevfs(vfs_t *vfsp)
1394 {
1395 struct pcfs *fsp = VFSTOPCFS(vfsp);
1396
1397 mutex_enter(&pcfslock);
1398 /*
1399 * Purging the FAT closes the device - can't do any more
1400 * I/O after this.
1401 */
1402 if (fsp->pcfs_fatp != (uchar_t *)0)
1403 pc_invalfat(fsp);
1404 mutex_exit(&pcfslock);
1405
1406 VN_RELE(fsp->pcfs_devvp);
1407 mutex_destroy(&fsp->pcfs_lock);
1408 kmem_free(fsp, sizeof (*fsp));
1409
1410 /*
1411 * Allow _fini() to succeed now, if so desired.
1412 */
1413 atomic_dec_32(&pcfs_mountcount);
1414 }
1415
1416
1417 /*
1418 * PC-style partition parsing and FAT BPB identification/validation code.
1419 * The partition parsers here assume:
1420 * - a FAT filesystem will be in a partition that has one of a set of
1421 * recognized partition IDs
1422 * - the user wants the 'numbering' (C:, D:, ...) that one would get
1423 * on MSDOS 6.x.
1424 * That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1425 * will not factor in the enumeration.
1426 * These days, such assumptions should be revisited. FAT is no longer the
1427 * only game in 'PC town'.
1428 */
1429 /*
1430 * isDosDrive()
1431 * Boolean function. Give it the systid field for an fdisk partition
1432 * and it decides if that's a systid that describes a DOS drive. We
1433 * use systid values defined in sys/dktp/fdisk.h.
1434 */
1435 static int
1436 isDosDrive(uchar_t checkMe)
1437 {
1438 return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1439 (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1440 (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1441 (checkMe == DIAGPART));
1442 }
1443
1444
1445 /*
1446 * isDosExtended()
1447 * Boolean function. Give it the systid field for an fdisk partition
1448 * and it decides if that's a systid that describes an extended DOS
1449 * partition.
1450 */
1451 static int
1452 isDosExtended(uchar_t checkMe)
1453 {
1454 return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1455 }
1456
1457
1458 /*
1459 * isBootPart()
1460 * Boolean function. Give it the systid field for an fdisk partition
1461 * and it decides if that's a systid that describes a Solaris boot
1462 * partition.
1463 */
1464 static int
1465 isBootPart(uchar_t checkMe)
1466 {
1467 return (checkMe == X86BOOT);
1468 }
1469
1470
1471 /*
1472 * noLogicalDrive()
1473 * Display error message about not being able to find a logical
1474 * drive.
1475 */
1476 static void
1477 noLogicalDrive(int ldrive)
1478 {
1479 if (ldrive == BOOT_PARTITION_DRIVE) {
1480 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1481 } else {
1482 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1483 }
1484 }
1485
1486
1487 /*
1488 * findTheDrive()
1489 * Discover offset of the requested logical drive, and return
1490 * that offset (startSector), the systid of that drive (sysid),
1491 * and a buffer pointer (bp), with the buffer contents being
1492 * the first sector of the logical drive (i.e., the sector that
1493 * contains the BPB for that drive).
1494 *
1495 * Note: this code is not capable of addressing >2TB disks, as it uses
1496 * daddr_t not diskaddr_t, some of the calculations would overflow
1497 */
1498 #define COPY_PTBL(mbr, ptblp) \
1499 bcopy(&(((struct mboot *)(mbr))->parts), (ptblp), \
1500 FD_NUMPART * sizeof (struct ipart))
1501
1502 static int
1503 findTheDrive(struct pcfs *fsp, buf_t **bp)
1504 {
1505 int ldrive = fsp->pcfs_ldrive;
1506 dev_t dev = fsp->pcfs_devvp->v_rdev;
1507
1508 struct ipart dosp[FD_NUMPART]; /* incore fdisk partition structure */
1509 daddr_t lastseek = 0; /* Disk block we sought previously */
1510 daddr_t diskblk = 0; /* Disk block to get */
1511 daddr_t xstartsect; /* base of Extended DOS partition */
1512 int logicalDriveCount = 0; /* Count of logical drives seen */
1513 int extendedPart = -1; /* index of extended dos partition */
1514 int primaryPart = -1; /* index of primary dos partition */
1515 int bootPart = -1; /* index of a Solaris boot partition */
1516 int xnumsect = -1; /* length of extended DOS partition */
1517 int driveIndex; /* computed FDISK table index */
1518 daddr_t startsec;
1519 len_t mediasize;
1520 int i;
1521 /*
1522 * Count of drives in the current extended partition's
1523 * FDISK table, and indexes of the drives themselves.
1524 */
1525 int extndDrives[FD_NUMPART];
1526 int numDrives = 0;
1527
1528 /*
1529 * Count of drives (beyond primary) in master boot record's
1530 * FDISK table, and indexes of the drives themselves.
1531 */
1532 int extraDrives[FD_NUMPART];
1533 int numExtraDrives = 0;
1534
1535 /*
1536 * "ldrive == 0" should never happen, as this is a request to
1537 * mount the physical device (and ignore partitioning). The code
1538 * in pcfs_mount() should have made sure that a logical drive number
1539 * is at least 1, meaning we're looking for drive "C:". It is not
1540 * safe (and a bug in the callers of this function) to request logical
1541 * drive number 0; we could ASSERT() but a graceful EIO is a more
1542 * polite way.
1543 */
1544 if (ldrive == 0) {
1545 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1546 noLogicalDrive(ldrive);
1547 return (EIO);
1548 }
1549
1550 /*
1551 * Copy from disk block into memory aligned structure for fdisk usage.
1552 */
1553 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1554
1555 /*
1556 * This check is ok because a FAT BPB and a master boot record (MBB)
1557 * have the same signature, in the same position within the block.
1558 */
1559 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1560 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1561 "device (%x.%x):%d\n",
1562 getmajor(dev), getminor(dev), ldrive);
1563 return (EINVAL);
1564 }
1565
1566 /*
1567 * Get a summary of what is in the Master FDISK table.
1568 * Normally we expect to find one partition marked as a DOS drive.
1569 * This partition is the one Windows calls the primary dos partition.
1570 * If the machine has any logical drives then we also expect
1571 * to find a partition marked as an extended DOS partition.
1572 *
1573 * Sometimes we'll find multiple partitions marked as DOS drives.
1574 * The Solaris fdisk program allows these partitions
1575 * to be created, but Windows fdisk no longer does. We still need
1576 * to support these, though, since Windows does. We also need to fix
1577 * our fdisk to behave like the Windows version.
1578 *
1579 * It turns out that some off-the-shelf media have *only* an
1580 * Extended partition, so we need to deal with that case as well.
1581 *
1582 * Only a single (the first) Extended or Boot Partition will
1583 * be recognized. Any others will be ignored.
1584 */
1585 for (i = 0; i < FD_NUMPART; i++) {
1586 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1587 uint_t, (uint_t)dosp[i].systid,
1588 uint_t, LE_32(dosp[i].relsect),
1589 uint_t, LE_32(dosp[i].numsect));
1590
1591 if (isDosDrive(dosp[i].systid)) {
1592 if (primaryPart < 0) {
1593 logicalDriveCount++;
1594 primaryPart = i;
1595 } else {
1596 extraDrives[numExtraDrives++] = i;
1597 }
1598 continue;
1599 }
1600 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1601 extendedPart = i;
1602 continue;
1603 }
1604 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1605 bootPart = i;
1606 continue;
1607 }
1608 }
1609
1610 if (ldrive == BOOT_PARTITION_DRIVE) {
1611 if (bootPart < 0) {
1612 noLogicalDrive(ldrive);
1613 return (EINVAL);
1614 }
1615 startsec = LE_32(dosp[bootPart].relsect);
1616 mediasize = LE_32(dosp[bootPart].numsect);
1617 goto found;
1618 }
1619
1620 if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1621 startsec = LE_32(dosp[primaryPart].relsect);
1622 mediasize = LE_32(dosp[primaryPart].numsect);
1623 goto found;
1624 }
1625
1626 /*
1627 * We are not looking for the C: drive (or the primary drive
1628 * was not found), so we had better have an extended partition
1629 * or extra drives in the Master FDISK table.
1630 */
1631 if ((extendedPart < 0) && (numExtraDrives == 0)) {
1632 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1633 noLogicalDrive(ldrive);
1634 return (EINVAL);
1635 }
1636
1637 if (extendedPart >= 0) {
1638 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1639 xnumsect = LE_32(dosp[extendedPart].numsect);
1640 do {
1641 /*
1642 * If the seek would not cause us to change
1643 * position on the drive, then we're out of
1644 * extended partitions to examine.
1645 */
1646 if (diskblk == lastseek)
1647 break;
1648 logicalDriveCount += numDrives;
1649 /*
1650 * Seek the next extended partition, and find
1651 * logical drives within it.
1652 */
1653 brelse(*bp);
1654 /*
1655 * bread() block numbers are multiples of DEV_BSIZE
1656 * but the device sector size (the unit of partitioning)
1657 * might be larger than that; pcfs_get_device_info()
1658 * has calculated the multiplicator for us.
1659 */
1660 *bp = bread(dev,
1661 pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1662 if ((*bp)->b_flags & B_ERROR) {
1663 return (EIO);
1664 }
1665
1666 lastseek = diskblk;
1667 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1668 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1669 cmn_err(CE_NOTE, "!pcfs: "
1670 "extended partition table signature err, "
1671 "device (%x.%x):%d, LBA %u",
1672 getmajor(dev), getminor(dev), ldrive,
1673 (uint_t)pc_dbdaddr(fsp, diskblk));
1674 return (EINVAL);
1675 }
1676 /*
1677 * Count up drives, and track where the next
1678 * extended partition is in case we need it. We
1679 * are expecting only one extended partition. If
1680 * there is more than one we'll only go to the
1681 * first one we see, but warn about ignoring.
1682 */
1683 numDrives = 0;
1684 for (i = 0; i < FD_NUMPART; i++) {
1685 DTRACE_PROBE4(extendedpart,
1686 struct pcfs *, fsp,
1687 uint_t, (uint_t)dosp[i].systid,
1688 uint_t, LE_32(dosp[i].relsect),
1689 uint_t, LE_32(dosp[i].numsect));
1690 if (isDosDrive(dosp[i].systid)) {
1691 extndDrives[numDrives++] = i;
1692 } else if (isDosExtended(dosp[i].systid)) {
1693 if (diskblk != lastseek) {
1694 /*
1695 * Already found an extended
1696 * partition in this table.
1697 */
1698 cmn_err(CE_NOTE,
1699 "!pcfs: ignoring unexpected"
1700 " additional extended"
1701 " partition");
1702 } else {
1703 diskblk = xstartsect +
1704 LE_32(dosp[i].relsect);
1705 }
1706 }
1707 }
1708 } while (ldrive > logicalDriveCount + numDrives);
1709
1710 ASSERT(numDrives <= FD_NUMPART);
1711
1712 if (ldrive <= logicalDriveCount + numDrives) {
1713 /*
1714 * The number of logical drives we've found thus
1715 * far is enough to get us to the one we were
1716 * searching for.
1717 */
1718 driveIndex = logicalDriveCount + numDrives - ldrive;
1719 mediasize =
1720 LE_32(dosp[extndDrives[driveIndex]].numsect);
1721 startsec =
1722 LE_32(dosp[extndDrives[driveIndex]].relsect) +
1723 lastseek;
1724 if (startsec > (xstartsect + xnumsect)) {
1725 cmn_err(CE_NOTE, "!pcfs: extended partition "
1726 "values bad");
1727 return (EINVAL);
1728 }
1729 goto found;
1730 } else {
1731 /*
1732 * We ran out of extended dos partition
1733 * drives. The only hope now is to go
1734 * back to extra drives defined in the master
1735 * fdisk table. But we overwrote that table
1736 * already, so we must load it in again.
1737 */
1738 logicalDriveCount += numDrives;
1739 brelse(*bp);
1740 ASSERT(fsp->pcfs_dosstart == 0);
1741 *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1742 fsp->pcfs_secsize);
1743 if ((*bp)->b_flags & B_ERROR) {
1744 return (EIO);
1745 }
1746 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1747 }
1748 }
1749 /*
1750 * Still haven't found the drive, is it an extra
1751 * drive defined in the main FDISK table?
1752 */
1753 if (ldrive <= logicalDriveCount + numExtraDrives) {
1754 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1755 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1756 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1757 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1758 goto found;
1759 }
1760 /*
1761 * Still haven't found the drive, and there is
1762 * nowhere else to look.
1763 */
1764 noLogicalDrive(ldrive);
1765 return (EINVAL);
1766
1767 found:
1768 /*
1769 * We need this value in units of sectorsize, because PCFS' internal
1770 * offset calculations go haywire for > 512Byte sectors unless all
1771 * pcfs_.*start values are in units of sectors.
1772 * So, assign before the capacity check (that's done in DEV_BSIZE)
1773 */
1774 fsp->pcfs_dosstart = startsec;
1775
1776 /*
1777 * convert from device sectors to proper units:
1778 * - starting sector: DEV_BSIZE (as argument to bread())
1779 * - media size: Bytes
1780 */
1781 startsec = pc_dbdaddr(fsp, startsec);
1782 mediasize *= fsp->pcfs_secsize;
1783
1784 /*
1785 * some additional validation / warnings in case the partition table
1786 * and the actual media capacity are not in accordance ...
1787 */
1788 if (fsp->pcfs_mediasize != 0) {
1789 diskaddr_t startoff =
1790 (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1791
1792 if (startoff >= fsp->pcfs_mediasize ||
1793 startoff + mediasize > fsp->pcfs_mediasize) {
1794 cmn_err(CE_WARN,
1795 "!pcfs: partition size (LBA start %u, %lld bytes, "
1796 "device (%x.%x):%d) smaller than "
1797 "mediasize (%lld bytes).\n"
1798 "filesystem may be truncated, access errors "
1799 "may result.\n",
1800 (uint_t)startsec, (long long)mediasize,
1801 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1802 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1803 }
1804 } else {
1805 fsp->pcfs_mediasize = mediasize;
1806 }
1807
1808 return (0);
1809 }
1810
1811
1812 static fattype_t
1813 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1814 {
1815 uint32_t ncl = fsp->pcfs_ncluster;
1816
1817 if (ncl <= 4096) {
1818 if (bpb_get_FatSz16(bpb) == 0)
1819 return (FAT_UNKNOWN);
1820
1821 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1822 bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1823 return (FAT12);
1824 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1825 return (FAT12);
1826 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1827 return (FAT16);
1828
1829 switch (bpb_get_Media(bpb)) {
1830 case SS8SPT:
1831 case DS8SPT:
1832 case SS9SPT:
1833 case DS9SPT:
1834 case DS18SPT:
1835 case DS9_15SPT:
1836 /*
1837 * Is this reliable - all floppies are FAT12 ?
1838 */
1839 return (FAT12);
1840 case MD_FIXED:
1841 /*
1842 * Is this reliable - disks are always FAT16 ?
1843 */
1844 return (FAT16);
1845 default:
1846 break;
1847 }
1848 } else if (ncl <= 65536) {
1849 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1850 return (FAT32);
1851 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1852 return (FAT32);
1853 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1854 return (FAT32);
1855
1856 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1857 return (FAT16);
1858 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1859 return (FAT16);
1860 }
1861
1862 /*
1863 * We don't know
1864 */
1865 return (FAT_UNKNOWN);
1866 }
1867
1868 /*
1869 * Check to see if the BPB we found is correct.
1870 *
1871 * This looks far more complicated that it needs to be for pure structural
1872 * validation. The reason for this is that parseBPB() is also used for
1873 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1874 * BPB fields have 'known good' values, even if we do not reject the BPB
1875 * when attempting to mount the filesystem.
1876 */
1877 static int
1878 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1879 {
1880 fattype_t type;
1881
1882 uint32_t ncl; /* number of clusters in file area */
1883 uint32_t rec;
1884 uint32_t reserved;
1885 uint32_t fsisec, bkbootsec;
1886 blkcnt_t totsec, totsec16, totsec32, datasec;
1887 size_t fatsec, fatsec16, fatsec32, rdirsec;
1888 size_t secsize;
1889 len_t mediasize;
1890 uint64_t validflags = 0;
1891
1892 if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1893 validflags |= BPB_BPBSIG_OK;
1894
1895 rec = bpb_get_RootEntCnt(bpb);
1896 reserved = bpb_get_RsvdSecCnt(bpb);
1897 fsisec = bpb_get_FSInfo32(bpb);
1898 bkbootsec = bpb_get_BkBootSec32(bpb);
1899 totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1900 totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1901 fatsec16 = bpb_get_FatSz16(bpb);
1902 fatsec32 = bpb_get_FatSz32(bpb);
1903
1904 totsec = totsec16 ? totsec16 : totsec32;
1905 fatsec = fatsec16 ? fatsec16 : fatsec32;
1906
1907 secsize = bpb_get_BytesPerSec(bpb);
1908 if (!VALID_SECSIZE(secsize))
1909 secsize = fsp->pcfs_secsize;
1910 if (secsize != fsp->pcfs_secsize) {
1911 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1912 getmajor(fsp->pcfs_xdev),
1913 getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1914 PC_DPRINTF2(3, "!BPB secsize %d != "
1915 "autodetected media block size %d\n",
1916 (int)secsize, (int)fsp->pcfs_secsize);
1917 if (fsp->pcfs_ldrive) {
1918 /*
1919 * We've already attempted to parse the partition
1920 * table. If the block size used for that don't match
1921 * the PCFS sector size, we're hosed one way or the
1922 * other. Just try what happens.
1923 */
1924 secsize = fsp->pcfs_secsize;
1925 PC_DPRINTF1(3,
1926 "!pcfs: Using autodetected secsize %d\n",
1927 (int)secsize);
1928 } else {
1929 /*
1930 * This allows mounting lofi images of PCFS partitions
1931 * with sectorsize != DEV_BSIZE. We can't parse the
1932 * partition table on whole-disk images unless the
1933 * (undocumented) "secsize=..." mount option is used,
1934 * but at least this allows us to mount if we have
1935 * an image of a partition.
1936 */
1937 PC_DPRINTF1(3,
1938 "!pcfs: Using BPB secsize %d\n", (int)secsize);
1939 }
1940 }
1941
1942 if (fsp->pcfs_mediasize == 0) {
1943 mediasize = (len_t)totsec * (len_t)secsize;
1944 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1945 "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1946 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1947 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1948 } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1949 cmn_err(CE_WARN,
1950 "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1951 "FAT BPB mediasize (%lld Bytes).\n"
1952 "truncated filesystem on device (%x.%x):%d, access errors "
1953 "possible.\n",
1954 (long long)fsp->pcfs_mediasize,
1955 (long long)(totsec * (blkcnt_t)secsize),
1956 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1957 fsp->pcfs_ldrive);
1958 mediasize = fsp->pcfs_mediasize;
1959 } else {
1960 /*
1961 * This is actually ok. A FAT needs not occupy the maximum
1962 * space available in its partition, it can be shorter.
1963 */
1964 mediasize = (len_t)totsec * (len_t)secsize;
1965 }
1966
1967 /*
1968 * Since we let just about anything pass through this function,
1969 * fence against divide-by-zero here.
1970 */
1971 if (secsize)
1972 rdirsec = roundup(rec * 32, secsize) / secsize;
1973 else
1974 rdirsec = 0;
1975
1976 /*
1977 * This assignment is necessary before pc_dbdaddr() can first be
1978 * used. Must initialize the value here.
1979 */
1980 fsp->pcfs_secsize = secsize;
1981 fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
1982
1983 fsp->pcfs_mediasize = mediasize;
1984
1985 fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
1986 fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
1987 fsp->pcfs_mediadesc = bpb_get_Media(bpb);
1988 fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
1989 fsp->pcfs_rdirsec = rdirsec;
1990
1991 /*
1992 * Remember: All PCFS offset calculations in sectors. Before I/O
1993 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
1994 * necessary so that media with > 512Byte sector sizes work correctly.
1995 */
1996 fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
1997 fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
1998 fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
1999 datasec = totsec -
2000 (blkcnt_t)fatsec * fsp->pcfs_numfat -
2001 (blkcnt_t)rdirsec -
2002 (blkcnt_t)reserved;
2003
2004 DTRACE_PROBE4(fatgeometry,
2005 blkcnt_t, totsec, size_t, fatsec,
2006 size_t, rdirsec, blkcnt_t, datasec);
2007
2008 /*
2009 * UINT32_MAX is an underflow check - we calculate in "blkcnt_t" which
2010 * is 64bit in order to be able to catch "impossible" sector counts.
2011 * A sector count in FAT must fit 32bit unsigned int.
2012 */
2013 if (totsec != 0 &&
2014 (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2015 (len_t)totsec * (len_t)secsize <= mediasize &&
2016 datasec < totsec && datasec <= UINT32_MAX)
2017 validflags |= BPB_TOTSEC_OK;
2018
2019 if (mediasize >= (len_t)datasec * (len_t)secsize)
2020 validflags |= BPB_MEDIASZ_OK;
2021
2022 if (VALID_SECSIZE(secsize))
2023 validflags |= BPB_SECSIZE_OK;
2024 if (VALID_SPCL(fsp->pcfs_spcl))
2025 validflags |= BPB_SECPERCLUS_OK;
2026 if (VALID_CLSIZE(fsp->pcfs_clsize))
2027 validflags |= BPB_CLSIZE_OK;
2028 if (VALID_NUMFATS(fsp->pcfs_numfat))
2029 validflags |= BPB_NUMFAT_OK;
2030 if (VALID_RSVDSEC(reserved) && reserved < totsec)
2031 validflags |= BPB_RSVDSECCNT_OK;
2032 if (VALID_MEDIA(fsp->pcfs_mediadesc))
2033 validflags |= BPB_MEDIADESC_OK;
2034 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2035 validflags |= BPB_BOOTSIG16_OK;
2036 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2037 validflags |= BPB_BOOTSIG32_OK;
2038 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2039 validflags |= BPB_FSTYPSTR16_OK;
2040 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2041 validflags |= BPB_FSTYPSTR32_OK;
2042 if (VALID_OEMNAME(bpb_OEMName(bpb)))
2043 validflags |= BPB_OEMNAME_OK;
2044 if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2045 validflags |= BPB_BKBOOTSEC_OK;
2046 if (fsisec > 0 && fsisec <= reserved)
2047 validflags |= BPB_FSISEC_OK;
2048 if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2049 validflags |= BPB_JMPBOOT_OK;
2050 if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2051 validflags |= BPB_FSVER_OK;
2052 if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2053 validflags |= BPB_VOLLAB16_OK;
2054 if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2055 validflags |= BPB_VOLLAB32_OK;
2056 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2057 validflags |= BPB_EXTFLAGS_OK;
2058
2059 /*
2060 * Try to determine which FAT format to use.
2061 *
2062 * Calculate the number of clusters in order to determine
2063 * the type of FAT we are looking at. This is the only
2064 * recommended way of determining FAT type, though there
2065 * are other hints in the data, this is the best way.
2066 *
2067 * Since we let just about "anything" pass through this function
2068 * without early exits, fence against divide-by-zero here.
2069 *
2070 * datasec was already validated against UINT32_MAX so we know
2071 * the result will not overflow the 32bit calculation.
2072 */
2073 if (fsp->pcfs_spcl)
2074 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2075 else
2076 ncl = 0;
2077
2078 fsp->pcfs_ncluster = ncl;
2079
2080 /*
2081 * From the Microsoft FAT specification:
2082 * In the following example, when it says <, it does not mean <=.
2083 * Note also that the numbers are correct. The first number for
2084 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2085 * and the '<' signs are not wrong.
2086 *
2087 * We "specialdetect" the corner cases, and use at least one "extra"
2088 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2089 * count is dangerously close to the boundaries.
2090 */
2091
2092 if (ncl <= PCF_FIRSTCLUSTER) {
2093 type = FAT_UNKNOWN;
2094 } else if (ncl < 4085) {
2095 type = FAT12;
2096 } else if (ncl <= 4096) {
2097 type = FAT_QUESTIONABLE;
2098 } else if (ncl < 65525) {
2099 type = FAT16;
2100 } else if (ncl <= 65536) {
2101 type = FAT_QUESTIONABLE;
2102 } else if (ncl < PCF_LASTCLUSTER32) {
2103 type = FAT32;
2104 } else {
2105 type = FAT_UNKNOWN;
2106 }
2107
2108 DTRACE_PROBE4(parseBPB__initial,
2109 struct pcfs *, fsp, unsigned char *, bpb,
2110 int, validflags, fattype_t, type);
2111
2112 recheck:
2113 fsp->pcfs_fatsec = fatsec;
2114
2115 /* Do some final sanity checks for each specific type of FAT */
2116 switch (type) {
2117 case FAT12:
2118 if (rec != 0)
2119 validflags |= BPB_ROOTENTCNT_OK;
2120 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2121 bpb_get_TotSec16(bpb) == 0)
2122 validflags |= BPB_TOTSEC16_OK;
2123 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2124 bpb_get_TotSec32(bpb) == 0)
2125 validflags |= BPB_TOTSEC32_OK;
2126 if (bpb_get_FatSz16(bpb) == fatsec)
2127 validflags |= BPB_FATSZ16_OK;
2128 if (fatsec * secsize >= ncl * 3 / 2)
2129 validflags |= BPB_FATSZ_OK;
2130 if (ncl < 4085)
2131 validflags |= BPB_NCLUSTERS_OK;
2132
2133 fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2134 fsp->pcfs_rootblksize =
2135 fsp->pcfs_rdirsec * secsize;
2136 fsp->pcfs_fsistart = 0;
2137
2138 if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2139 type = FAT_UNKNOWN;
2140 break;
2141 case FAT16:
2142 if (rec != 0)
2143 validflags |= BPB_ROOTENTCNT_OK;
2144 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2145 bpb_get_TotSec16(bpb) == 0)
2146 validflags |= BPB_TOTSEC16_OK;
2147 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2148 bpb_get_TotSec32(bpb) == 0)
2149 validflags |= BPB_TOTSEC32_OK;
2150 if (bpb_get_FatSz16(bpb) == fatsec)
2151 validflags |= BPB_FATSZ16_OK;
2152 if (fatsec * secsize >= ncl * 2)
2153 validflags |= BPB_FATSZ_OK;
2154 if (ncl >= 4085 && ncl < 65525)
2155 validflags |= BPB_NCLUSTERS_OK;
2156
2157 fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2158 fsp->pcfs_rootblksize =
2159 fsp->pcfs_rdirsec * secsize;
2160 fsp->pcfs_fsistart = 0;
2161
2162 if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2163 type = FAT_UNKNOWN;
2164 break;
2165 case FAT32:
2166 if (rec == 0)
2167 validflags |= BPB_ROOTENTCNT_OK;
2168 if (bpb_get_TotSec16(bpb) == 0)
2169 validflags |= BPB_TOTSEC16_OK;
2170 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2171 validflags |= BPB_TOTSEC32_OK;
2172 if (bpb_get_FatSz16(bpb) == 0)
2173 validflags |= BPB_FATSZ16_OK;
2174 if (bpb_get_FatSz32(bpb) == fatsec)
2175 validflags |= BPB_FATSZ32_OK;
2176 if (fatsec * secsize >= ncl * 4)
2177 validflags |= BPB_FATSZ_OK;
2178 if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2179 validflags |= BPB_NCLUSTERS_OK;
2180
2181 fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2182 fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2183 fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2184 if (validflags & BPB_FSISEC_OK)
2185 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2186 fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2187 if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2188 validflags |= BPB_ROOTCLUSTER_OK;
2189
2190 /*
2191 * Current PCFS code only works if 'pcfs_rdirstart'
2192 * contains the root cluster number on FAT32.
2193 * That's a mis-use and would better be changed.
2194 */
2195 fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2196
2197 if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2198 type = FAT_UNKNOWN;
2199 break;
2200 case FAT_QUESTIONABLE:
2201 type = secondaryBPBChecks(fsp, bpb, secsize);
2202 goto recheck;
2203 default:
2204 ASSERT(type == FAT_UNKNOWN);
2205 break;
2206 }
2207
2208 ASSERT(type != FAT_QUESTIONABLE);
2209
2210 fsp->pcfs_fattype = type;
2211
2212 if (valid)
2213 *valid = validflags;
2214
2215 DTRACE_PROBE4(parseBPB__final,
2216 struct pcfs *, fsp, unsigned char *, bpb,
2217 int, validflags, fattype_t, type);
2218
2219 if (type != FAT_UNKNOWN) {
2220 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2221 ASSERT(ISP2(secsize / DEV_BSIZE));
2222 return (1);
2223 }
2224
2225 return (0);
2226 }
2227
2228
2229 /*
2230 * Detect the device's native block size (sector size).
2231 *
2232 * Test whether the device is:
2233 * - a floppy device from a known controller type via DKIOCINFO
2234 * - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2235 * - a PCMCIA sram memory card (pseudofloppy) using pcram(7d)
2236 * - a USB floppy drive (identified by drive geometry)
2237 *
2238 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2239 * to minimize risks due to slow I/O and user hotplugging / device ejection.
2240 *
2241 * This might be a bit wasteful on kernel stack space; if anyone's
2242 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2243 */
2244 static void
2245 pcfs_device_getinfo(struct pcfs *fsp)
2246 {
2247 dev_t rdev = fsp->pcfs_xdev;
2248 int error;
2249 union {
2250 struct dk_minfo mi;
2251 struct dk_cinfo ci;
2252 struct dk_geom gi;
2253 struct fd_char fc;
2254 } arg; /* save stackspace ... */
2255 intptr_t argp = (intptr_t)&arg;
2256 ldi_handle_t lh;
2257 ldi_ident_t li;
2258 int isfloppy, isremoveable, ishotpluggable;
2259 cred_t *cr = CRED();
2260
2261 if (ldi_ident_from_dev(rdev, &li))
2262 goto out;
2263
2264 error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2265 ldi_ident_release(li);
2266 if (error)
2267 goto out;
2268
2269 /*
2270 * Not sure if this could possibly happen. It'd be a bit like
2271 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2272 * expecting it, needs some thought if triggered ...
2273 */
2274 ASSERT(fsp->pcfs_xdev == rdev);
2275
2276 /*
2277 * Check for removeable/hotpluggable media.
2278 */
2279 if (ldi_ioctl(lh, DKIOCREMOVABLE,
2280 (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2281 isremoveable = 0;
2282 }
2283 if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2284 (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2285 ishotpluggable = 0;
2286 }
2287
2288 /*
2289 * Make sure we don't use "half-initialized" values if the ioctls fail.
2290 */
2291 if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2292 bzero(&arg, sizeof (arg));
2293 fsp->pcfs_mediasize = 0;
2294 } else {
2295 fsp->pcfs_mediasize =
2296 (len_t)arg.mi.dki_lbsize *
2297 (len_t)arg.mi.dki_capacity;
2298 }
2299
2300 if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2301 if (fsp->pcfs_secsize == 0) {
2302 fsp->pcfs_secsize = arg.mi.dki_lbsize;
2303 fsp->pcfs_sdshift =
2304 ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2305 } else {
2306 PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2307 "%d, device (%x.%x), different from user-provided "
2308 "%d. User override - ignoring autodetect result.\n",
2309 arg.mi.dki_lbsize,
2310 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2311 fsp->pcfs_secsize);
2312 }
2313 } else if (arg.mi.dki_lbsize) {
2314 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2315 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2316 "Ignoring autodetect result.\n",
2317 arg.mi.dki_lbsize,
2318 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2319 }
2320
2321 /*
2322 * We treat the following media types as a floppy by default.
2323 */
2324 isfloppy =
2325 (arg.mi.dki_media_type == DK_FLOPPY ||
2326 arg.mi.dki_media_type == DK_ZIP ||
2327 arg.mi.dki_media_type == DK_JAZ);
2328
2329 /*
2330 * if this device understands fdio(7I) requests it's
2331 * obviously a floppy drive.
2332 */
2333 if (!isfloppy &&
2334 !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2335 isfloppy = 1;
2336
2337 /*
2338 * some devices (PCMCIA pseudofloppies) we like to treat
2339 * as floppies, but they don't understand fdio(7I) requests.
2340 */
2341 if (!isfloppy &&
2342 !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2343 (arg.ci.dki_ctype == DKC_WDC2880 ||
2344 arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2345 arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2346 arg.ci.dki_ctype == DKC_INTEL82077 ||
2347 (arg.ci.dki_ctype == DKC_PCMCIA_MEM &&
2348 arg.ci.dki_flags & DKI_PCMCIA_PFD)))
2349 isfloppy = 1;
2350
2351 /*
2352 * This is the "final fallback" test - media with
2353 * 2 heads and 80 cylinders are assumed to be floppies.
2354 * This is normally true for USB floppy drives ...
2355 */
2356 if (!isfloppy &&
2357 !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2358 (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2359 isfloppy = 1;
2360
2361 /*
2362 * This is similar to the "old" PCFS code that sets this flag
2363 * just based on the media descriptor being 0xf8 (MD_FIXED).
2364 * Should be re-worked. We really need some specialcasing for
2365 * removeable media.
2366 */
2367 if (!isfloppy) {
2368 fsp->pcfs_flags |= PCFS_NOCHK;
2369 }
2370
2371 /*
2372 * We automatically disable access time updates if the medium is
2373 * removeable and/or hotpluggable, and the admin did not explicitly
2374 * request access time updates (via the "atime" mount option).
2375 * The majority of flash-based media should fit this category.
2376 * Minimizing write access extends the lifetime of your memory stick !
2377 */
2378 if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2379 (isremoveable || ishotpluggable | isfloppy)) {
2380 fsp->pcfs_flags |= PCFS_NOATIME;
2381 }
2382
2383 (void) ldi_close(lh, FREAD, cr);
2384 out:
2385 if (fsp->pcfs_secsize == 0) {
2386 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2387 "device (%x.%x) failed, no user-provided fallback. "
2388 "Using %d bytes.\n",
2389 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2390 DEV_BSIZE);
2391 fsp->pcfs_secsize = DEV_BSIZE;
2392 fsp->pcfs_sdshift = 0;
2393 }
2394 ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2395 ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2396 }
2397
2398 /*
2399 * Get the FAT type for the DOS medium.
2400 *
2401 * -------------------------
2402 * According to Microsoft:
2403 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2404 * count of clusters on the volume and nothing else.
2405 * -------------------------
2406 *
2407 */
2408 static int
2409 pc_getfattype(struct pcfs *fsp)
2410 {
2411 int error = 0;
2412 buf_t *bp = NULL;
2413 struct vnode *devvp = fsp->pcfs_devvp;
2414 dev_t dev = devvp->v_rdev;
2415
2416 /*
2417 * Detect the native block size of the medium, and attempt to
2418 * detect whether the medium is removeable.
2419 * We do treat removeable media (floppies, PCMCIA memory cards,
2420 * USB and FireWire disks) differently wrt. to the frequency
2421 * and synchronicity of FAT updates.
2422 * We need to know the media block size in order to be able to
2423 * parse the partition table.
2424 */
2425 pcfs_device_getinfo(fsp);
2426
2427 /*
2428 * Unpartitioned media (floppies and some removeable devices)
2429 * don't have a partition table, the FAT BPB is at disk block 0.
2430 * Start out by reading block 0.
2431 */
2432 fsp->pcfs_dosstart = 0;
2433 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2434
2435 if (error = geterror(bp))
2436 goto out;
2437
2438 /*
2439 * If a logical drive number is requested, parse the partition table
2440 * and attempt to locate it. Otherwise, proceed immediately to the
2441 * BPB check. findTheDrive(), if successful, returns the disk block
2442 * number where the requested partition starts in "startsec".
2443 */
2444 if (fsp->pcfs_ldrive != 0) {
2445 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2446 "device (%x,%x):%d to find BPB\n",
2447 getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2448
2449 if (error = findTheDrive(fsp, &bp))
2450 goto out;
2451
2452 ASSERT(fsp->pcfs_dosstart != 0);
2453
2454 brelse(bp);
2455 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2456 fsp->pcfs_secsize);
2457 if (error = geterror(bp))
2458 goto out;
2459 }
2460
2461 /*
2462 * Validate the BPB and fill in the instance structure.
2463 */
2464 if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2465 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2466 "device (%x.%x):%d, disk LBA %u\n",
2467 getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2468 (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2469 error = EINVAL;
2470 goto out;
2471 }
2472
2473 ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2474
2475 out:
2476 /*
2477 * Release the buffer used
2478 */
2479 if (bp != NULL)
2480 brelse(bp);
2481 return (error);
2482 }
2483
2484
2485 /*
2486 * Get the file allocation table.
2487 * If there is an old FAT, invalidate it.
2488 */
2489 int
2490 pc_getfat(struct pcfs *fsp)
2491 {
2492 struct buf *bp = NULL;
2493 uchar_t *fatp = NULL;
2494 uchar_t *fat_changemap = NULL;
2495 int error;
2496 int fat_changemapsize;
2497 int flags = 0;
2498 int nfat;
2499 int altfat_mustmatch = 0;
2500 int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2501
2502 if (fsp->pcfs_fatp) {
2503 /*
2504 * There is a FAT in core.
2505 * If there are open file pcnodes or we have modified it or
2506 * it hasn't timed out yet use the in core FAT.
2507 * Otherwise invalidate it and get a new one
2508 */
2509 #ifdef notdef
2510 if (fsp->pcfs_frefs ||
2511 (fsp->pcfs_flags & PCFS_FATMOD) ||
2512 (gethrestime_sec() < fsp->pcfs_fattime)) {
2513 return (0);
2514 } else {
2515 mutex_enter(&pcfslock);
2516 pc_invalfat(fsp);
2517 mutex_exit(&pcfslock);
2518 }
2519 #endif /* notdef */
2520 return (0);
2521 }
2522
2523 /*
2524 * Get FAT and check it for validity
2525 */
2526 fatp = kmem_alloc(fatsize, KM_SLEEP);
2527 error = pc_readfat(fsp, fatp);
2528 if (error) {
2529 flags = B_ERROR;
2530 goto out;
2531 }
2532 fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2533 fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2534 fsp->pcfs_fatp = fatp;
2535 fsp->pcfs_fat_changemapsize = fat_changemapsize;
2536 fsp->pcfs_fat_changemap = fat_changemap;
2537
2538 /*
2539 * The only definite signature check is that the
2540 * media descriptor byte should match the first byte
2541 * of the FAT block.
2542 */
2543 if (fatp[0] != fsp->pcfs_mediadesc) {
2544 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2545 "media descriptor %x, FAT[0] lowbyte %x\n",
2546 (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2547 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2548 altfat_mustmatch = 1;
2549 }
2550
2551 /*
2552 * Get alternate FATs and check for consistency
2553 * This is an inlined version of pc_readfat().
2554 * Since we're only comparing FAT and alternate FAT,
2555 * there's no reason to let pc_readfat() copy data out
2556 * of the buf. Instead, compare in-situ, one cluster
2557 * at a time.
2558 */
2559 for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2560 size_t startsec;
2561 size_t off;
2562
2563 startsec = pc_dbdaddr(fsp,
2564 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2565
2566 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2567 daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2568 pc_cltodb(fsp, pc_lblkno(fsp, off)));
2569
2570 bp = bread(fsp->pcfs_xdev, fatblk,
2571 MIN(fsp->pcfs_clsize, fatsize - off));
2572 if (bp->b_flags & (B_ERROR | B_STALE)) {
2573 cmn_err(CE_NOTE,
2574 "!pcfs: alternate FAT #%d (start LBA %p)"
2575 " read error at offset %ld on device"
2576 " (%x.%x):%d",
2577 nfat, (void *)(uintptr_t)startsec, off,
2578 getmajor(fsp->pcfs_xdev),
2579 getminor(fsp->pcfs_xdev),
2580 fsp->pcfs_ldrive);
2581 flags = B_ERROR;
2582 error = EIO;
2583 goto out;
2584 }
2585 bp->b_flags |= B_STALE | B_AGE;
2586 if (bcmp(bp->b_un.b_addr, fatp + off,
2587 MIN(fsp->pcfs_clsize, fatsize - off))) {
2588 cmn_err(CE_NOTE,
2589 "!pcfs: alternate FAT #%d (start LBA %p)"
2590 " corrupted at offset %ld on device"
2591 " (%x.%x):%d",
2592 nfat, (void *)(uintptr_t)startsec, off,
2593 getmajor(fsp->pcfs_xdev),
2594 getminor(fsp->pcfs_xdev),
2595 fsp->pcfs_ldrive);
2596 if (altfat_mustmatch) {
2597 flags = B_ERROR;
2598 error = EIO;
2599 goto out;
2600 }
2601 }
2602 brelse(bp);
2603 bp = NULL; /* prevent double release */
2604 }
2605 }
2606
2607 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2608 fsp->pcfs_fatjustread = 1;
2609
2610 /*
2611 * Retrieve FAT32 fsinfo sector.
2612 * A failure to read this is not fatal to accessing the volume.
2613 * It simply means operations that count or search free blocks
2614 * will have to do a full FAT walk, vs. a possibly quicker lookup
2615 * of the summary information.
2616 * Hence, we log a message but return success overall after this point.
2617 */
2618 if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2619 struct fat_od_fsi *fsinfo_disk;
2620
2621 bp = bread(fsp->pcfs_xdev,
2622 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2623 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2624 if (bp->b_flags & (B_ERROR | B_STALE) ||
2625 !FSISIG_OK(fsinfo_disk)) {
2626 cmn_err(CE_NOTE,
2627 "!pcfs: error reading fat32 fsinfo from "
2628 "device (%x.%x):%d, block %lld",
2629 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2630 fsp->pcfs_ldrive,
2631 (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2632 fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2633 fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2634 fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2635 } else {
2636 bp->b_flags |= B_STALE | B_AGE;
2637 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2638 fsp->pcfs_fsinfo.fs_free_clusters =
2639 LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2640 fsp->pcfs_fsinfo.fs_next_free =
2641 LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2642 }
2643 brelse(bp);
2644 bp = NULL;
2645 }
2646
2647 if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2648 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2649 else
2650 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2651
2652 return (0);
2653
2654 out:
2655 cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2656 if (bp)
2657 brelse(bp);
2658 if (fatp)
2659 kmem_free(fatp, fatsize);
2660 if (fat_changemap)
2661 kmem_free(fat_changemap, fat_changemapsize);
2662
2663 if (flags) {
2664 pc_mark_irrecov(fsp);
2665 }
2666 return (error);
2667 }