Move CallBack Server thread creation, initial processing and destruction to RPC Cleanup some RPC code. Remove extraneous fields from nfs41_cb_info and clean up the code. Change KM_SLEEP in mir_nfs41_callback_thread to KM_NOSLEEP. Fix lint warnings Incorporate code review comments. Remove un-needed variable.
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/vnode.h> 38 #include <sys/pathname.h> 39 #include <sys/sysmacros.h> 40 #include <sys/kmem.h> 41 #include <sys/mkdev.h> 42 #include <sys/mount.h> 43 #include <sys/statvfs.h> 44 #include <sys/errno.h> 45 #include <sys/debug.h> 46 #include <sys/cmn_err.h> 47 #include <sys/utsname.h> 48 #include <sys/bootconf.h> 49 #include <sys/modctl.h> 50 #include <sys/acl.h> 51 #include <sys/flock.h> 52 #include <sys/time.h> 53 #include <sys/disp.h> 54 #include <sys/policy.h> 55 #include <sys/socket.h> 56 #include <sys/netconfig.h> 57 #include <sys/dnlc.h> 58 #include <sys/list.h> 59 #include <sys/mntent.h> 60 #include <sys/atomic.h> 61 #include <sys/tsol/label.h> 62 #include <sys/sdt.h> 63 #include <sys/avl.h> 64 65 #include <rpc/types.h> 66 #include <rpc/auth.h> 67 #include <rpc/rpcsec_gss.h> 68 #include <rpc/clnt.h> 69 70 #include <nfs/nfs.h> 71 #include <nfs/nfs_clnt.h> 72 #include <nfs/mount.h> 73 #include <nfs/nfs_acl.h> 74 75 #include <fs/fs_subr.h> 76 77 #include <nfs/nfs4.h> 78 #include <nfs/rnode4.h> 79 #include <nfs/nfs4_clnt.h> 80 #include <nfs/nfs4_clnt_impl.h> 81 #include <sys/fs/autofs.h> 82 83 84 /* 85 * Arguments passed to thread to free data structures from forced unmount. 86 */ 87 88 typedef struct { 89 vfs_t *fm_vfsp; 90 int fm_flag; 91 cred_t *fm_cr; 92 } freemountargs_t; 93 94 static void async_free_mount(vfs_t *, int, cred_t *); 95 static void nfs4_free_mount(vfs_t *, int, cred_t *); 96 static void nfs4_free_mount_thread(freemountargs_t *); 97 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *); 98 99 /* 100 * From rpcsec module (common/rpcsec). 101 */ 102 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 103 extern void sec_clnt_freeinfo(struct sec_data *); 104 105 /* 106 * The order and contents of this structure must be kept in sync with that of 107 * rfsreqcnt_v4_tmpl in nfs_stats.c 108 */ 109 static char *v40_ops[] = { 110 "null", "compound", "reserved", "access", "close", "commit", 111 "create", "delegpurge", "delegreturn", "getattr", "getfh", "link", 112 "lock", "lockt", "locku", "lookup", "lookupp", "nverify", "open", 113 "openattr", "open_confirm", "open_downgrade", "putfh", "putpubfh", 114 "putrootfh", "read", "readdir", "readlink", "remove", "rename", 115 "renew", "restorefh", "savefh", "secinfo", "setattr", "setclientid", 116 "setclientid_confirm", "verify", "write", "release_lockowner" 117 }; 118 119 /* 120 * The order and contents of this structure must be kept in sync with that of 121 * rfsreqcnt_v41_tmpl in nfs_stats.c 122 */ 123 static char *v41_ops[] = { 124 "null", "compound", "reserved", "access", 125 "close", "commit", "create", "delegpurge", "delegreturn", 126 "getattr", "getfh", "link", "lock", "lockt", "locku", 127 "lookup", "lookupp", "nverify", "open", "openattr", 128 "open_confirm", "open_downgrade", "putfh", "putpubfh", 129 "putrootfh", "read", "readdir", "readlink", "remove", "rename", 130 "renew", "restorefh", "savefh", "secinfo", "setattr", 131 "setclientid", "setclientid_confirm", "verify", "write", 132 "release_lockowner", "backchannel_ctl", "bind_conn_to_session", 133 "exchange_id", "create_session", "destroy_session", 134 "free_stateid", "get_dir_delegation", "getdeviceinfo", 135 "getdevicelist", "layoutcommit", "layoutget", "layoutreturn", 136 "secinfo_no_name", "sequence", "set_ssv", "test_stateid", 137 "want_delegation", "destroy_clientid", "reclaim_complete"}; 138 139 static char **rfsnames_v4[NFS4_MINORVERSMAX + 1] = {v40_ops, v41_ops}; 140 /* 141 * nfs4_max_mount_retry is the number of times the client will redrive a 142 * mount compound before giving up and returning failure. The intent is 143 * to redrive mount compounds which fail NFS4ERR_STALE so that if a 144 * component of the server path being mounted goes stale, it can 145 * "recover" by redriving the mount compund (LOOKUP ops). This recovery 146 * code is needed outside of the recovery framework because mount is a 147 * special case. The client doesn't create vnodes/rnodes for components 148 * of the server path being mounted. The recovery code recovers real 149 * client objects, not STALE FHs which map to components of the server 150 * path being mounted. 151 * 152 * We could just fail the mount on the first time, but that would 153 * instantly trigger failover (from nfs4_mount), and the client should 154 * try to re-lookup the STALE FH before doing failover. The easiest way 155 * to "re-lookup" is to simply redrive the mount compound. 156 */ 157 static int nfs4_max_mount_retry = 2; 158 159 uint32_t nfs4_max_minor_version = NFS4_MINORVERSMAX; 160 uint32_t nfs4_min_minor_version = 0; 161 162 /* 163 * nfs4 vfs operations. 164 */ 165 int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 166 static int nfs4_unmount(vfs_t *, int, cred_t *); 167 static int nfs4_root(vfs_t *, vnode_t **); 168 static int nfs4_statvfs(vfs_t *, struct statvfs64 *); 169 static int nfs4_sync(vfs_t *, short, cred_t *); 170 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *); 171 static int nfs4_mountroot(vfs_t *, whymountroot_t); 172 static void nfs4_freevfs(vfs_t *); 173 174 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *, 175 int, cred_t *, zone_t *); 176 177 vfsops_t *nfs4_vfsops; 178 179 int nfs4_vfsinit(void); 180 void nfs4_vfsfini(void); 181 void nfs4_minorops_init(void); 182 183 static void nfs4setclientid_init(void); 184 static void nfs4setclientid_fini(void); 185 static void destroy_nfs4_server(nfs4_server_t *); 186 static void remove_mi(nfs4_server_t *, mntinfo4_t *); 187 188 extern void nfs4_ephemeral_init(void); 189 extern void nfs4_ephemeral_fini(void); 190 191 /* 192 * Initialize the vfs structure 193 */ 194 195 static int nfs4fstyp; 196 197 198 /* 199 * Debug variable to check for rdma based 200 * transport startup and cleanup. Controlled 201 * through /etc/system. Off by default. 202 */ 203 extern int rdma_debug; 204 205 extern int nfs41_birpc; 206 207 int 208 nfs4init(int fstyp, char *name) 209 { 210 static const fs_operation_def_t nfs4_vfsops_template[] = { 211 VFSNAME_MOUNT, { .vfs_mount = nfs4_mount }, 212 VFSNAME_UNMOUNT, { .vfs_unmount = nfs4_unmount }, 213 VFSNAME_ROOT, { .vfs_root = nfs4_root }, 214 VFSNAME_STATVFS, { .vfs_statvfs = nfs4_statvfs }, 215 VFSNAME_SYNC, { .vfs_sync = nfs4_sync }, 216 VFSNAME_VGET, { .vfs_vget = nfs4_vget }, 217 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs4_mountroot }, 218 VFSNAME_FREEVFS, { .vfs_freevfs = nfs4_freevfs }, 219 NULL, NULL 220 }; 221 int error; 222 223 nfs4_vfsops = NULL; 224 nfs4_vnodeops = NULL; 225 nfs4_trigger_vnodeops = NULL; 226 227 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops); 228 if (error != 0) { 229 zcmn_err(GLOBAL_ZONEID, CE_WARN, 230 "nfs4init: bad vfs ops template"); 231 goto out; 232 } 233 234 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops); 235 if (error != 0) { 236 zcmn_err(GLOBAL_ZONEID, CE_WARN, 237 "nfs4init: bad vnode ops template"); 238 goto out; 239 } 240 241 error = vn_make_ops("nfs4_trigger", nfs4_trigger_vnodeops_template, 242 &nfs4_trigger_vnodeops); 243 if (error != 0) { 244 zcmn_err(GLOBAL_ZONEID, CE_WARN, 245 "nfs4init: bad trigger vnode ops template"); 246 goto out; 247 } 248 249 nfs4fstyp = fstyp; 250 (void) nfs4_vfsinit(); 251 (void) nfs4_init_dot_entries(); 252 253 nfs4_minorops_init(); 254 255 out: 256 if (error) { 257 if (nfs4_trigger_vnodeops != NULL) 258 vn_freevnodeops(nfs4_trigger_vnodeops); 259 260 if (nfs4_vnodeops != NULL) 261 vn_freevnodeops(nfs4_vnodeops); 262 263 (void) vfs_freevfsops_by_type(fstyp); 264 } 265 266 return (error); 267 } 268 269 void 270 nfs4fini(void) 271 { 272 (void) nfs4_destroy_dot_entries(); 273 nfs4_vfsfini(); 274 } 275 276 void 277 nfs4_minorops_init(void) 278 { 279 int nmops; 280 281 nmops = nfs4_max_minor_version + 1; 282 283 nfs4protosw = (nfs4_minorvers_ops_t **)kmem_alloc( 284 nmops * sizeof (nfs4_minorvers_ops_t *), KM_SLEEP); 285 nfs4_protosw_init(nfs4protosw); 286 } 287 288 /* 289 * Create a new sec_data structure to store AUTH_DH related data: 290 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC 291 * flag set for NFS V4 since we are avoiding to contact the rpcbind 292 * daemon and is using the IP time service (IPPORT_TIMESERVER). 293 * 294 * sec_data can be freed by sec_clnt_freeinfo(). 295 */ 296 static struct sec_data * 297 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr, 298 struct knetconfig *knconf) { 299 struct sec_data *secdata; 300 dh_k4_clntdata_t *data; 301 char *pf, *p; 302 303 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0) 304 return (NULL); 305 306 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 307 secdata->flags = 0; 308 309 data = kmem_alloc(sizeof (*data), KM_SLEEP); 310 311 data->syncaddr.maxlen = syncaddr->maxlen; 312 data->syncaddr.len = syncaddr->len; 313 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP); 314 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len); 315 316 /* 317 * duplicate the knconf information for the 318 * new opaque data. 319 */ 320 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 321 *data->knconf = *knconf; 322 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 323 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 324 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 325 bcopy(knconf->knc_proto, p, KNC_STRSIZE); 326 data->knconf->knc_protofmly = pf; 327 data->knconf->knc_proto = p; 328 329 /* move server netname to the sec_data structure */ 330 data->netname = kmem_alloc(nlen, KM_SLEEP); 331 bcopy(netname, data->netname, nlen); 332 data->netnamelen = (int)nlen; 333 334 secdata->secmod = AUTH_DH; 335 secdata->rpcflavor = AUTH_DH; 336 secdata->data = (caddr_t)data; 337 338 return (secdata); 339 } 340 341 /* 342 * Returns (deep) copy of sec_data_t. Allocates all memory required; caller 343 * is responsible for freeing. 344 */ 345 sec_data_t * 346 copy_sec_data(sec_data_t *fsecdata) { 347 sec_data_t *tsecdata; 348 349 if (fsecdata == NULL) 350 return (NULL); 351 352 if (fsecdata->rpcflavor == AUTH_DH) { 353 dh_k4_clntdata_t *fdata = (dh_k4_clntdata_t *)fsecdata->data; 354 355 if (fdata == NULL) 356 return (NULL); 357 358 tsecdata = (sec_data_t *)create_authdh_data(fdata->netname, 359 fdata->netnamelen, &fdata->syncaddr, fdata->knconf); 360 361 return (tsecdata); 362 } 363 364 tsecdata = kmem_zalloc(sizeof (sec_data_t), KM_SLEEP); 365 366 tsecdata->secmod = fsecdata->secmod; 367 tsecdata->rpcflavor = fsecdata->rpcflavor; 368 tsecdata->flags = fsecdata->flags; 369 tsecdata->uid = fsecdata->uid; 370 371 if (fsecdata->rpcflavor == RPCSEC_GSS) { 372 gss_clntdata_t *gcd = (gss_clntdata_t *)fsecdata->data; 373 374 tsecdata->data = (caddr_t)copy_sec_data_gss(gcd); 375 } else { 376 tsecdata->data = NULL; 377 } 378 379 return (tsecdata); 380 } 381 382 gss_clntdata_t * 383 copy_sec_data_gss(gss_clntdata_t *fdata) 384 { 385 gss_clntdata_t *tdata; 386 387 if (fdata == NULL) 388 return (NULL); 389 390 tdata = kmem_zalloc(sizeof (gss_clntdata_t), KM_SLEEP); 391 392 tdata->mechanism.length = fdata->mechanism.length; 393 tdata->mechanism.elements = kmem_zalloc(fdata->mechanism.length, 394 KM_SLEEP); 395 bcopy(fdata->mechanism.elements, tdata->mechanism.elements, 396 fdata->mechanism.length); 397 398 tdata->service = fdata->service; 399 400 (void) strcpy(tdata->uname, fdata->uname); 401 (void) strcpy(tdata->inst, fdata->inst); 402 (void) strcpy(tdata->realm, fdata->realm); 403 404 tdata->qop = fdata->qop; 405 406 return (tdata); 407 } 408 409 static int 410 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp) 411 { 412 servinfo4_t *si; 413 414 /* 415 * Iterate over the servinfo4 list to make sure 416 * we do not have a duplicate. Skip any servinfo4 417 * that has been marked "NOT IN USE" 418 */ 419 for (si = svp_head; si; si = si->sv_next) { 420 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0); 421 if (si->sv_flags & SV4_NOTINUSE) { 422 nfs_rw_exit(&si->sv_lock); 423 continue; 424 } 425 nfs_rw_exit(&si->sv_lock); 426 if (si == svp) 427 continue; 428 if (si->sv_addr.len == svp->sv_addr.len && 429 strcmp(si->sv_knconf->knc_protofmly, 430 svp->sv_knconf->knc_protofmly) == 0 && 431 bcmp(si->sv_addr.buf, svp->sv_addr.buf, 432 si->sv_addr.len) == 0) { 433 /* it's a duplicate */ 434 return (1); 435 } 436 } 437 /* it's not a duplicate */ 438 return (0); 439 } 440 441 void 442 nfs4_free_args(struct nfs_args *nargs) 443 { 444 if (nargs->knconf) { 445 if (nargs->knconf->knc_protofmly) 446 kmem_free(nargs->knconf->knc_protofmly, 447 KNC_STRSIZE); 448 if (nargs->knconf->knc_proto) 449 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 450 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 451 nargs->knconf = NULL; 452 } 453 454 if (nargs->fh) { 455 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 456 nargs->fh = NULL; 457 } 458 459 if (nargs->hostname) { 460 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 461 nargs->hostname = NULL; 462 } 463 464 if (nargs->addr) { 465 if (nargs->addr->buf) { 466 ASSERT(nargs->addr->len); 467 kmem_free(nargs->addr->buf, nargs->addr->len); 468 } 469 kmem_free(nargs->addr, sizeof (struct netbuf)); 470 nargs->addr = NULL; 471 } 472 473 if (nargs->syncaddr) { 474 ASSERT(nargs->syncaddr->len); 475 if (nargs->syncaddr->buf) { 476 ASSERT(nargs->syncaddr->len); 477 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 478 } 479 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 480 nargs->syncaddr = NULL; 481 } 482 483 if (nargs->netname) { 484 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 485 nargs->netname = NULL; 486 } 487 488 if (nargs->nfs_ext_u.nfs_extA.secdata) { 489 sec_clnt_freeinfo( 490 nargs->nfs_ext_u.nfs_extA.secdata); 491 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 492 } 493 } 494 495 496 int 497 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs) 498 { 499 500 int error; 501 size_t hlen; /* length of hostname */ 502 size_t nlen; /* length of netname */ 503 char netname[MAXNETNAMELEN+1]; /* server's netname */ 504 struct netbuf addr; /* server's address */ 505 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 506 struct knetconfig *knconf; /* transport structure */ 507 struct sec_data *secdata = NULL; /* security data */ 508 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 509 STRUCT_DECL(knetconfig, knconf_tmp); 510 STRUCT_DECL(netbuf, addr_tmp); 511 int flags; 512 char *p, *pf; 513 struct pathname pn; 514 char *userbufptr; 515 516 517 bzero(nargs, sizeof (*nargs)); 518 519 STRUCT_INIT(args, get_udatamodel()); 520 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 521 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 522 STRUCT_SIZE(args)))) 523 return (EFAULT); 524 525 nargs->wsize = STRUCT_FGET(args, wsize); 526 nargs->rsize = STRUCT_FGET(args, rsize); 527 nargs->timeo = STRUCT_FGET(args, timeo); 528 nargs->retrans = STRUCT_FGET(args, retrans); 529 nargs->acregmin = STRUCT_FGET(args, acregmin); 530 nargs->acregmax = STRUCT_FGET(args, acregmax); 531 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 532 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 533 534 flags = STRUCT_FGET(args, flags); 535 nargs->flags = flags; 536 537 addr.buf = NULL; 538 syncaddr.buf = NULL; 539 540 541 /* 542 * Allocate space for a knetconfig structure and 543 * its strings and copy in from user-land. 544 */ 545 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 546 STRUCT_INIT(knconf_tmp, get_udatamodel()); 547 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 548 STRUCT_SIZE(knconf_tmp))) { 549 kmem_free(knconf, sizeof (*knconf)); 550 return (EFAULT); 551 } 552 553 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 554 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 555 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 556 if (get_udatamodel() != DATAMODEL_LP64) { 557 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 558 } else { 559 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 560 } 561 562 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 563 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 564 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 565 if (error) { 566 kmem_free(pf, KNC_STRSIZE); 567 kmem_free(p, KNC_STRSIZE); 568 kmem_free(knconf, sizeof (*knconf)); 569 return (error); 570 } 571 572 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 573 if (error) { 574 kmem_free(pf, KNC_STRSIZE); 575 kmem_free(p, KNC_STRSIZE); 576 kmem_free(knconf, sizeof (*knconf)); 577 return (error); 578 } 579 580 581 knconf->knc_protofmly = pf; 582 knconf->knc_proto = p; 583 584 nargs->knconf = knconf; 585 586 /* 587 * Get server address 588 */ 589 STRUCT_INIT(addr_tmp, get_udatamodel()); 590 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 591 STRUCT_SIZE(addr_tmp))) { 592 error = EFAULT; 593 goto errout; 594 } 595 596 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 597 userbufptr = STRUCT_FGETP(addr_tmp, buf); 598 addr.len = STRUCT_FGET(addr_tmp, len); 599 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 600 addr.maxlen = addr.len; 601 if (copyin(userbufptr, addr.buf, addr.len)) { 602 kmem_free(addr.buf, addr.len); 603 error = EFAULT; 604 goto errout; 605 } 606 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 607 608 /* 609 * Get the root fhandle 610 */ 611 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn); 612 if (error) 613 goto errout; 614 615 /* Volatile fh: keep server paths, so use actual-size strings */ 616 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP); 617 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen); 618 nargs->fh[pn.pn_pathlen] = '\0'; 619 pn_free(&pn); 620 621 622 /* 623 * Get server's hostname 624 */ 625 if (flags & NFSMNT_HOSTNAME) { 626 error = copyinstr(STRUCT_FGETP(args, hostname), 627 netname, sizeof (netname), &hlen); 628 if (error) 629 goto errout; 630 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 631 (void) strcpy(nargs->hostname, netname); 632 633 } else { 634 nargs->hostname = NULL; 635 } 636 637 638 /* 639 * If there are syncaddr and netname data, load them in. This is 640 * to support data needed for NFSV4 when AUTH_DH is the negotiated 641 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 642 */ 643 netname[0] = '\0'; 644 if (flags & NFSMNT_SECURE) { 645 646 /* get syncaddr */ 647 STRUCT_INIT(addr_tmp, get_udatamodel()); 648 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 649 STRUCT_SIZE(addr_tmp))) { 650 error = EINVAL; 651 goto errout; 652 } 653 userbufptr = STRUCT_FGETP(addr_tmp, buf); 654 syncaddr.len = STRUCT_FGET(addr_tmp, len); 655 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 656 syncaddr.maxlen = syncaddr.len; 657 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 658 kmem_free(syncaddr.buf, syncaddr.len); 659 error = EFAULT; 660 goto errout; 661 } 662 663 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 664 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 665 666 /* get server's netname */ 667 if (copyinstr(STRUCT_FGETP(args, netname), netname, 668 sizeof (netname), &nlen)) { 669 error = EFAULT; 670 goto errout; 671 } 672 673 netname[nlen] = '\0'; 674 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 675 (void) strcpy(nargs->netname, netname); 676 } 677 678 /* 679 * Get the extention data which has the security data structure. 680 * This includes data for AUTH_SYS as well. 681 */ 682 if (flags & NFSMNT_NEWARGS) { 683 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 684 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 685 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 686 /* 687 * Indicating the application is using the new 688 * sec_data structure to pass in the security 689 * data. 690 */ 691 if (STRUCT_FGETP(args, 692 nfs_ext_u.nfs_extA.secdata) != NULL) { 693 error = sec_clnt_loadinfo( 694 (struct sec_data *)STRUCT_FGETP(args, 695 nfs_ext_u.nfs_extA.secdata), 696 &secdata, get_udatamodel()); 697 } 698 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 699 } 700 } 701 702 if (error) 703 goto errout; 704 705 /* 706 * Failover support: 707 * 708 * We may have a linked list of nfs_args structures, 709 * which means the user is looking for failover. If 710 * the mount is either not "read-only" or "soft", 711 * we want to bail out with EINVAL. 712 */ 713 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 714 nargs->nfs_ext_u.nfs_extB.next = 715 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 716 717 errout: 718 if (error) 719 nfs4_free_args(nargs); 720 721 return (error); 722 } 723 724 725 /* 726 * nfs mount vfsop 727 * Set up mount info record and attach it to vfs struct. 728 */ 729 int 730 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 731 { 732 char *data = uap->dataptr; 733 int error; 734 vnode_t *rtvp; /* the server's root */ 735 mntinfo4_t *mi; /* mount info, pointed at by vfs */ 736 struct knetconfig *rdma_knconf; /* rdma transport structure */ 737 rnode4_t *rp; 738 struct servinfo4 *svp; /* nfs server info */ 739 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */ 740 struct servinfo4 *svp_head; /* first nfs server info */ 741 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */ 742 struct sec_data *secdata; /* security data */ 743 struct nfs_args *args = NULL; 744 int flags, addr_type, removed; 745 zone_t *zone = nfs_zone(); 746 zone_t *mntzone = NULL; 747 748 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 749 return (EPERM); 750 if (mvp->v_type != VDIR) 751 return (ENOTDIR); 752 753 /* 754 * get arguments 755 * 756 * nfs_args is now versioned and is extensible, so 757 * uap->datalen might be different from sizeof (args) 758 * in a compatible situation. 759 */ 760 more: 761 if (!(uap->flags & MS_SYSSPACE)) { 762 if (args == NULL) 763 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 764 else 765 nfs4_free_args(args); 766 error = nfs4_copyin(data, uap->datalen, args); 767 if (error) { 768 if (args) { 769 kmem_free(args, sizeof (*args)); 770 } 771 return (error); 772 } 773 } else { 774 args = (struct nfs_args *)data; 775 } 776 777 flags = args->flags; 778 779 /* 780 * If the request changes the locking type, disallow the remount, 781 * because it's questionable whether we can transfer the 782 * locking state correctly. 783 */ 784 if (uap->flags & MS_REMOUNT) { 785 if (!(uap->flags & MS_SYSSPACE)) { 786 nfs4_free_args(args); 787 kmem_free(args, sizeof (*args)); 788 } 789 if ((mi = VFTOMI4(vfsp)) != NULL) { 790 uint_t new_mi_llock; 791 uint_t old_mi_llock; 792 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 793 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0; 794 if (old_mi_llock != new_mi_llock) 795 return (EBUSY); 796 } 797 return (0); 798 } 799 800 /* 801 * For ephemeral mount trigger stub vnodes, we have two problems 802 * to solve: racing threads will likely fail the v_count check, and 803 * we want only one to proceed with the mount. 804 * 805 * For stubs, if the mount has already occurred (via a racing thread), 806 * just return success. If not, skip the v_count check and proceed. 807 * Note that we are already serialised at this point. 808 */ 809 mutex_enter(&mvp->v_lock); 810 if (vn_matchops(mvp, nfs4_trigger_vnodeops)) { 811 /* mntpt is a v4 stub vnode */ 812 ASSERT(RP_ISSTUB(VTOR4(mvp))); 813 ASSERT(!(uap->flags & MS_OVERLAY)); 814 ASSERT(!(mvp->v_flag & VROOT)); 815 if (vn_mountedvfs(mvp) != NULL) { 816 /* ephemeral mount has already occurred */ 817 ASSERT(uap->flags & MS_SYSSPACE); 818 mutex_exit(&mvp->v_lock); 819 return (0); 820 } 821 } else { 822 /* mntpt is a non-v4 or v4 non-stub vnode */ 823 if (!(uap->flags & MS_OVERLAY) && 824 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 825 mutex_exit(&mvp->v_lock); 826 if (!(uap->flags & MS_SYSSPACE)) { 827 nfs4_free_args(args); 828 kmem_free(args, sizeof (*args)); 829 } 830 return (EBUSY); 831 } 832 } 833 mutex_exit(&mvp->v_lock); 834 835 /* make sure things are zeroed for errout: */ 836 rtvp = NULL; 837 mi = NULL; 838 secdata = NULL; 839 840 /* 841 * A valid knetconfig structure is required. 842 */ 843 if (!(flags & NFSMNT_KNCONF) || 844 args->knconf == NULL || args->knconf->knc_protofmly == NULL || 845 args->knconf->knc_proto == NULL || 846 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) { 847 if (!(uap->flags & MS_SYSSPACE)) { 848 nfs4_free_args(args); 849 kmem_free(args, sizeof (*args)); 850 } 851 return (EINVAL); 852 } 853 854 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 855 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 856 if (!(uap->flags & MS_SYSSPACE)) { 857 nfs4_free_args(args); 858 kmem_free(args, sizeof (*args)); 859 } 860 return (EINVAL); 861 } 862 863 /* 864 * Allocate a servinfo4 struct. 865 */ 866 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 867 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 868 if (svp_tail) { 869 svp_2ndlast = svp_tail; 870 svp_tail->sv_next = svp; 871 } else { 872 svp_head = svp; 873 svp_2ndlast = svp; 874 } 875 876 svp_tail = svp; 877 svp->sv_knconf = args->knconf; 878 args->knconf = NULL; 879 880 /* 881 * Get server address 882 */ 883 if (args->addr == NULL || args->addr->buf == NULL) { 884 error = EINVAL; 885 goto errout; 886 } 887 888 svp->sv_addr.maxlen = args->addr->maxlen; 889 svp->sv_addr.len = args->addr->len; 890 svp->sv_addr.buf = args->addr->buf; 891 args->addr->buf = NULL; 892 893 /* 894 * Get the root fhandle 895 */ 896 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) { 897 error = EINVAL; 898 goto errout; 899 } 900 901 svp->sv_path = args->fh; 902 svp->sv_pathlen = strlen(args->fh) + 1; 903 args->fh = NULL; 904 905 /* 906 * Get server's hostname 907 */ 908 if (flags & NFSMNT_HOSTNAME) { 909 if (args->hostname == NULL || (strlen(args->hostname) > 910 MAXNETNAMELEN)) { 911 error = EINVAL; 912 goto errout; 913 } 914 svp->sv_hostnamelen = strlen(args->hostname) + 1; 915 svp->sv_hostname = args->hostname; 916 args->hostname = NULL; 917 } else { 918 char *p = "unknown-host"; 919 svp->sv_hostnamelen = strlen(p) + 1; 920 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 921 (void) strcpy(svp->sv_hostname, p); 922 } 923 924 /* 925 * RDMA MOUNT SUPPORT FOR NFS v4. 926 * Establish, is it possible to use RDMA, if so overload the 927 * knconf with rdma specific knconf and free the orignal knconf. 928 */ 929 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 930 /* 931 * Determine the addr type for RDMA, IPv4 or v6. 932 */ 933 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 934 addr_type = AF_INET; 935 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 936 addr_type = AF_INET6; 937 938 if (rdma_reachable(addr_type, &svp->sv_addr, 939 &rdma_knconf) == 0) { 940 /* 941 * If successful, hijack the orignal knconf and 942 * replace with the new one, depending on the flags. 943 */ 944 svp->sv_origknconf = svp->sv_knconf; 945 svp->sv_knconf = rdma_knconf; 946 } else { 947 if (flags & NFSMNT_TRYRDMA) { 948 #ifdef DEBUG 949 if (rdma_debug) 950 zcmn_err(getzoneid(), CE_WARN, 951 "no RDMA onboard, revert\n"); 952 #endif 953 } 954 955 if (flags & NFSMNT_DORDMA) { 956 /* 957 * If proto=rdma is specified and no RDMA 958 * path to this server is avialable then 959 * ditch this server. 960 * This is not included in the mountable 961 * server list or the replica list. 962 * Check if more servers are specified; 963 * Failover case, otherwise bail out of mount. 964 */ 965 if (args->nfs_args_ext == NFS_ARGS_EXTB && 966 args->nfs_ext_u.nfs_extB.next != NULL) { 967 data = (char *) 968 args->nfs_ext_u.nfs_extB.next; 969 if (uap->flags & MS_RDONLY && 970 !(flags & NFSMNT_SOFT)) { 971 if (svp_head->sv_next == NULL) { 972 svp_tail = NULL; 973 svp_2ndlast = NULL; 974 sv4_free(svp_head); 975 goto more; 976 } else { 977 svp_tail = svp_2ndlast; 978 svp_2ndlast->sv_next = 979 NULL; 980 sv4_free(svp); 981 goto more; 982 } 983 } 984 } else { 985 /* 986 * This is the last server specified 987 * in the nfs_args list passed down 988 * and its not rdma capable. 989 */ 990 if (svp_head->sv_next == NULL) { 991 /* 992 * Is this the only one 993 */ 994 error = EINVAL; 995 #ifdef DEBUG 996 if (rdma_debug) 997 zcmn_err(getzoneid(), 998 CE_WARN, 999 "No RDMA srv"); 1000 #endif 1001 goto errout; 1002 } else { 1003 /* 1004 * There is list, since some 1005 * servers specified before 1006 * this passed all requirements 1007 */ 1008 svp_tail = svp_2ndlast; 1009 svp_2ndlast->sv_next = NULL; 1010 sv4_free(svp); 1011 goto proceed; 1012 } 1013 } 1014 } 1015 } 1016 } 1017 1018 /* 1019 * If there are syncaddr and netname data, load them in. This is 1020 * to support data needed for NFSV4 when AUTH_DH is the negotiated 1021 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 1022 */ 1023 if (args->flags & NFSMNT_SECURE) { 1024 svp->sv_dhsec = create_authdh_data(args->netname, 1025 strlen(args->netname), 1026 args->syncaddr, svp->sv_knconf); 1027 } 1028 1029 /* 1030 * Get the extention data which has the security data structure. 1031 * This includes data for AUTH_SYS as well. 1032 */ 1033 if (flags & NFSMNT_NEWARGS) { 1034 switch (args->nfs_args_ext) { 1035 case NFS_ARGS_EXTA: 1036 case NFS_ARGS_EXTB: 1037 /* 1038 * Indicating the application is using the new 1039 * sec_data structure to pass in the security 1040 * data. 1041 */ 1042 secdata = args->nfs_ext_u.nfs_extA.secdata; 1043 if (secdata == NULL) { 1044 error = EINVAL; 1045 } else if (uap->flags & MS_SYSSPACE) { 1046 /* 1047 * Need to validate the flavor here if 1048 * sysspace, userspace was already 1049 * validate from the nfs_copyin function. 1050 */ 1051 switch (secdata->rpcflavor) { 1052 case AUTH_NONE: 1053 case AUTH_UNIX: 1054 case AUTH_LOOPBACK: 1055 case AUTH_DES: 1056 case RPCSEC_GSS: 1057 break; 1058 default: 1059 error = EINVAL; 1060 goto errout; 1061 } 1062 } 1063 args->nfs_ext_u.nfs_extA.secdata = NULL; 1064 break; 1065 1066 default: 1067 error = EINVAL; 1068 break; 1069 } 1070 1071 } else if (flags & NFSMNT_SECURE) { 1072 /* 1073 * NFSMNT_SECURE is deprecated but we keep it 1074 * to support the rogue user-generated application 1075 * that may use this undocumented interface to do 1076 * AUTH_DH security, e.g. our own rexd. 1077 * 1078 * Also note that NFSMNT_SECURE is used for passing 1079 * AUTH_DH info to be used in negotiation. 1080 */ 1081 secdata = create_authdh_data(args->netname, 1082 strlen(args->netname), args->syncaddr, svp->sv_knconf); 1083 1084 } else { 1085 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 1086 secdata->secmod = secdata->rpcflavor = AUTH_SYS; 1087 secdata->data = NULL; 1088 } 1089 1090 svp->sv_secdata = secdata; 1091 1092 /* 1093 * User does not explictly specify a flavor, and a user 1094 * defined default flavor is passed down. 1095 */ 1096 if (flags & NFSMNT_SECDEFAULT) { 1097 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1098 svp->sv_flags |= SV4_TRYSECDEFAULT; 1099 nfs_rw_exit(&svp->sv_lock); 1100 } 1101 1102 /* 1103 * Failover support: 1104 * 1105 * We may have a linked list of nfs_args structures, 1106 * which means the user is looking for failover. If 1107 * the mount is either not "read-only" or "soft", 1108 * we want to bail out with EINVAL. 1109 */ 1110 if (args->nfs_args_ext == NFS_ARGS_EXTB && 1111 args->nfs_ext_u.nfs_extB.next != NULL) { 1112 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 1113 data = (char *)args->nfs_ext_u.nfs_extB.next; 1114 goto more; 1115 } 1116 error = EINVAL; 1117 goto errout; 1118 } 1119 1120 /* 1121 * Determine the zone we're being mounted into. 1122 */ 1123 zone_hold(mntzone = zone); /* start with this assumption */ 1124 if (getzoneid() == GLOBAL_ZONEID) { 1125 zone_rele(mntzone); 1126 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 1127 ASSERT(mntzone != NULL); 1128 if (mntzone != zone) { 1129 error = EBUSY; 1130 goto errout; 1131 } 1132 } 1133 1134 if (is_system_labeled()) { 1135 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 1136 svp->sv_knconf, cr); 1137 1138 if (error > 0) 1139 goto errout; 1140 1141 if (error == -1) { 1142 /* change mount to read-only to prevent write-down */ 1143 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1144 } 1145 } 1146 1147 /* 1148 * Stop the mount from going any further if the zone is going away. 1149 */ 1150 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 1151 error = EBUSY; 1152 goto errout; 1153 } 1154 1155 /* 1156 * Get root vnode. 1157 */ 1158 proceed: 1159 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 1160 if (error) { 1161 /* if nfs4rootvp failed, it will free svp_head */ 1162 svp_head = NULL; 1163 goto errout; 1164 } 1165 1166 mi = VTOMI4(rtvp); 1167 1168 /* 1169 * Set option fields in the mount info record 1170 */ 1171 1172 if (svp_head->sv_next) { 1173 mutex_enter(&mi->mi_lock); 1174 mi->mi_flags |= MI4_LLOCK; 1175 mutex_exit(&mi->mi_lock); 1176 } 1177 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args); 1178 if (error) 1179 goto errout; 1180 1181 /* 1182 * Time to tie in the mirror mount info at last! 1183 */ 1184 if (flags & NFSMNT_EPHEMERAL) 1185 error = nfs4_record_ephemeral_mount(mi, mvp); 1186 1187 errout: 1188 if (error) { 1189 if (rtvp != NULL) { 1190 rp = VTOR4(rtvp); 1191 if (rp->r_flags & R4HASHED) 1192 rp4_rmhash(rp); 1193 } 1194 if (mi != NULL) { 1195 nfs4_async_stop(vfsp); 1196 nfs4_async_manager_stop(vfsp); 1197 nfs4_remove_mi_from_server(mi, NULL); 1198 if (rtvp != NULL) 1199 VN_RELE(rtvp); 1200 if (mntzone != NULL) 1201 zone_rele(mntzone); 1202 /* need to remove it from the zone */ 1203 removed = nfs4_mi_zonelist_remove(mi); 1204 if (removed) 1205 zone_rele(mi->mi_zone); 1206 MI4_RELE(mi); 1207 if (!(uap->flags & MS_SYSSPACE) && args) { 1208 nfs4_free_args(args); 1209 kmem_free(args, sizeof (*args)); 1210 } 1211 return (error); 1212 } 1213 if (svp_head) 1214 sv4_free(svp_head); 1215 } 1216 1217 if (!(uap->flags & MS_SYSSPACE) && args) { 1218 nfs4_free_args(args); 1219 kmem_free(args, sizeof (*args)); 1220 } 1221 if (rtvp != NULL) 1222 VN_RELE(rtvp); 1223 1224 if (mntzone != NULL) 1225 zone_rele(mntzone); 1226 1227 return (error); 1228 } 1229 1230 #ifdef DEBUG 1231 #define VERS_MSG "NFS4 server " 1232 #else 1233 #define VERS_MSG "NFS server " 1234 #endif 1235 1236 #define READ_MSG \ 1237 VERS_MSG "%s returned 0 for read transfer size" 1238 #define WRITE_MSG \ 1239 VERS_MSG "%s returned 0 for write transfer size" 1240 #define SIZE_MSG \ 1241 VERS_MSG "%s returned 0 for maximum file size" 1242 1243 /* 1244 * Get the symbolic link text from the server for a given filehandle 1245 * of that symlink. 1246 * 1247 * (get symlink text) PUTFH READLINK 1248 */ 1249 static int 1250 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr, 1251 int flags) 1252 { 1253 COMPOUND4args_clnt args; 1254 COMPOUND4res_clnt res; 1255 int doqueue; 1256 nfs_argop4 argop[2]; 1257 nfs_resop4 *resop; 1258 READLINK4res *lr_res; 1259 uint_t len; 1260 bool_t needrecov = FALSE; 1261 nfs4_recov_state_t recov_state; 1262 nfs4_sharedfh_t *sfh; 1263 nfs4_error_t e; 1264 int num_retry = nfs4_max_mount_retry; 1265 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1266 1267 sfh = sfh4_get(fh, mi); 1268 recov_state.rs_flags = 0; 1269 recov_state.rs_num_retry_despite_err = 0; 1270 1271 recov_retry: 1272 nfs4_error_zinit(&e); 1273 1274 args.array_len = 2; 1275 args.array = argop; 1276 args.ctag = TAG_GET_SYMLINK; 1277 1278 if (! recovery) { 1279 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 1280 if (e.error) { 1281 sfh4_rele(&sfh); 1282 return (e.error); 1283 } 1284 } 1285 1286 /* 0. putfh symlink fh */ 1287 argop[0].argop = OP_CPUTFH; 1288 argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1289 1290 /* 1. readlink */ 1291 argop[1].argop = OP_READLINK; 1292 1293 doqueue = 1; 1294 1295 rfs4call(mi, NULL, &args, &res, cr, &doqueue, 0, &e); 1296 1297 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1298 1299 if (needrecov && !recovery && num_retry-- > 0) { 1300 1301 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1302 "getlinktext_otw: initiating recovery\n")); 1303 1304 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 1305 OP_READLINK, NULL) == FALSE) { 1306 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1307 if (!e.error) 1308 (void) xdr_free(xdr_COMPOUND4res_clnt, 1309 (caddr_t)&res); 1310 goto recov_retry; 1311 } 1312 } 1313 1314 /* 1315 * If non-NFS4 pcol error and/or we weren't able to recover. 1316 */ 1317 if (e.error != 0) { 1318 if (! recovery) 1319 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1320 sfh4_rele(&sfh); 1321 return (e.error); 1322 } 1323 1324 if (res.status) { 1325 e.error = geterrno4(res.status); 1326 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1327 if (! recovery) 1328 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1329 sfh4_rele(&sfh); 1330 return (e.error); 1331 } 1332 1333 /* res.status == NFS4_OK */ 1334 ASSERT(res.status == NFS4_OK); 1335 1336 resop = &res.array[1]; /* readlink res */ 1337 lr_res = &resop->nfs_resop4_u.opreadlink; 1338 1339 /* treat symlink name as data */ 1340 *linktextp = utf8_to_str(&lr_res->link, &len, NULL); 1341 1342 if (! recovery) 1343 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 1344 sfh4_rele(&sfh); 1345 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1346 return (0); 1347 } 1348 1349 /* 1350 * Skip over consecutive slashes and "/./" in a pathname. 1351 */ 1352 void 1353 pathname_skipslashdot(struct pathname *pnp) 1354 { 1355 char *c1, *c2; 1356 1357 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') { 1358 1359 c1 = pnp->pn_path + 1; 1360 c2 = pnp->pn_path + 2; 1361 1362 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) { 1363 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */ 1364 pnp->pn_pathlen = pnp->pn_pathlen - 2; 1365 } else { 1366 pnp->pn_path++; 1367 pnp->pn_pathlen--; 1368 } 1369 } 1370 } 1371 1372 /* 1373 * Resolve a symbolic link path. The symlink is in the nth component of 1374 * svp->sv_path and has an nfs4 file handle "fh". 1375 * Upon return, the sv_path will point to the new path that has the nth 1376 * component resolved to its symlink text. 1377 */ 1378 int 1379 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh, 1380 cred_t *cr, int flags) 1381 { 1382 char *oldpath; 1383 char *symlink, *newpath; 1384 struct pathname oldpn, newpn; 1385 char component[MAXNAMELEN]; 1386 int i, addlen, error = 0; 1387 int oldpathlen; 1388 1389 /* Get the symbolic link text over the wire. */ 1390 error = getlinktext_otw(mi, fh, &symlink, cr, flags); 1391 1392 if (error || symlink == NULL || strlen(symlink) == 0) 1393 return (error); 1394 1395 /* 1396 * Compose the new pathname. 1397 * Note: 1398 * - only the nth component is resolved for the pathname. 1399 * - pathname.pn_pathlen does not count the ending null byte. 1400 */ 1401 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1402 oldpath = svp->sv_path; 1403 oldpathlen = svp->sv_pathlen; 1404 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) { 1405 nfs_rw_exit(&svp->sv_lock); 1406 kmem_free(symlink, strlen(symlink) + 1); 1407 return (error); 1408 } 1409 nfs_rw_exit(&svp->sv_lock); 1410 pn_alloc(&newpn); 1411 1412 /* 1413 * Skip over previous components from the oldpath so that the 1414 * oldpn.pn_path will point to the symlink component. Skip 1415 * leading slashes and "/./" (no OP_LOOKUP on ".") so that 1416 * pn_getcompnent can get the component. 1417 */ 1418 for (i = 1; i < nth; i++) { 1419 pathname_skipslashdot(&oldpn); 1420 error = pn_getcomponent(&oldpn, component); 1421 if (error) 1422 goto out; 1423 } 1424 1425 /* 1426 * Copy the old path upto the component right before the symlink 1427 * if the symlink is not an absolute path. 1428 */ 1429 if (symlink[0] != '/') { 1430 addlen = oldpn.pn_path - oldpn.pn_buf; 1431 bcopy(oldpn.pn_buf, newpn.pn_path, addlen); 1432 newpn.pn_pathlen += addlen; 1433 newpn.pn_path += addlen; 1434 newpn.pn_buf[newpn.pn_pathlen] = '/'; 1435 newpn.pn_pathlen++; 1436 newpn.pn_path++; 1437 } 1438 1439 /* copy the resolved symbolic link text */ 1440 addlen = strlen(symlink); 1441 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1442 error = ENAMETOOLONG; 1443 goto out; 1444 } 1445 bcopy(symlink, newpn.pn_path, addlen); 1446 newpn.pn_pathlen += addlen; 1447 newpn.pn_path += addlen; 1448 1449 /* 1450 * Check if there is any remaining path after the symlink component. 1451 * First, skip the symlink component. 1452 */ 1453 pathname_skipslashdot(&oldpn); 1454 if (error = pn_getcomponent(&oldpn, component)) 1455 goto out; 1456 1457 addlen = pn_pathleft(&oldpn); /* includes counting the slash */ 1458 1459 /* 1460 * Copy the remaining path to the new pathname if there is any. 1461 */ 1462 if (addlen > 0) { 1463 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) { 1464 error = ENAMETOOLONG; 1465 goto out; 1466 } 1467 bcopy(oldpn.pn_path, newpn.pn_path, addlen); 1468 newpn.pn_pathlen += addlen; 1469 } 1470 newpn.pn_buf[newpn.pn_pathlen] = '\0'; 1471 1472 /* get the newpath and store it in the servinfo4_t */ 1473 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP); 1474 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen); 1475 newpath[newpn.pn_pathlen] = '\0'; 1476 1477 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1478 svp->sv_path = newpath; 1479 svp->sv_pathlen = strlen(newpath) + 1; 1480 nfs_rw_exit(&svp->sv_lock); 1481 1482 kmem_free(oldpath, oldpathlen); 1483 out: 1484 kmem_free(symlink, strlen(symlink) + 1); 1485 pn_free(&newpn); 1486 pn_free(&oldpn); 1487 1488 return (error); 1489 } 1490 1491 /* 1492 * Checks for minorversion mismatch and if we can retry. 1493 * returns 1 with mi_minorversion downgraded if true 1494 * or 0 otherwise 1495 */ 1496 1497 int 1498 nfs4check_minorvers_mismatch(mntinfo4_t *mi, nfs4_error_t *ep) 1499 { 1500 struct nfs_stats *nfsstatsp; 1501 1502 if (ep->stat == NFS4ERR_MINOR_VERS_MISMATCH || 1503 ep->rpc_status == RPC_CANTDECODEARGS) { 1504 mutex_enter(&mi->mi_lock); 1505 if (NFS4_MINORVERSION(mi) > nfs4_min_minor_version) { 1506 mi->mi_minorversion -= 1; 1507 mi->mi_attrvers = mi->mi_minorversion; 1508 nfsstatsp = zone_getspecific(nfsstat_zone_key, 1509 nfs_zone()); 1510 ASSERT(nfsstatsp != NULL); 1511 /* 1512 * Update the mi fields to that of the correct 1513 * minor version. Note that we are not adjusting 1514 * the kstat count for the previous MISMATCHED 1515 * compound since we want the mismatched compound 1516 * to be accounted against the mismatched 1517 * version. 1518 */ 1519 mi->mi_reqs = nfsstatsp-> 1520 nfs_stats_v4[mi->mi_minorversion].rfsreqcnt_ptr; 1521 mi->mi_rfsnames = rfsnames_v4[mi->mi_minorversion]; 1522 mutex_exit(&mi->mi_lock); 1523 return (1); 1524 } 1525 mutex_exit(&mi->mi_lock); 1526 } 1527 return (0); 1528 } 1529 1530 void 1531 nfs4_set_minorversion(mntinfo4_t *mi, int minorversion) 1532 { 1533 struct nfs_stats *nfsstatsp; 1534 1535 mutex_enter(&mi->mi_lock); 1536 mi->mi_minorversion = minorversion; 1537 mi->mi_attrvers = minorversion; 1538 1539 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1540 ASSERT(nfsstatsp != NULL); 1541 mi->mi_reqs = nfsstatsp-> 1542 nfs_stats_v4[mi->mi_minorversion].rfsreqcnt_ptr; 1543 mi->mi_rfsnames = rfsnames_v4[mi->mi_minorversion]; 1544 1545 mutex_exit(&mi->mi_lock); 1546 } 1547 1548 /* 1549 * Get the root filehandle for the given filesystem and server, and update 1550 * svp. 1551 * 1552 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop 1553 * to coordinate with recovery. Otherwise, the caller is assumed to be 1554 * the recovery thread or have already done a start_fop. 1555 * 1556 * Errors are returned by the nfs4_error_t parameter. 1557 */ 1558 1559 static void 1560 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, 1561 int flags, cred_t *cr, nfs4_error_t *ep) 1562 { 1563 COMPOUND4args_clnt args; 1564 COMPOUND4res_clnt res; 1565 int doqueue = 1; 1566 nfs_argop4 *argop; 1567 nfs_resop4 *resop; 1568 nfs4_ga_res_t *garp; 1569 int num_argops; 1570 lookup4_param_t lookuparg; 1571 nfs_fh4 *tmpfhp; 1572 nfs_fh4 *resfhp; 1573 bool_t needrecov = FALSE; 1574 nfs4_recov_state_t recov_state; 1575 int llndx; 1576 int nthcomp; 1577 int recovery = !(flags & NFS4_GETFH_NEEDSOP); 1578 int versmismatch = 0; 1579 1580 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1581 ASSERT(svp->sv_path != NULL); 1582 if (svp->sv_path[0] == '\0') { 1583 nfs_rw_exit(&svp->sv_lock); 1584 nfs4_error_init(ep, EINVAL); 1585 return; 1586 } 1587 nfs_rw_exit(&svp->sv_lock); 1588 1589 do { 1590 nfs4_set_clientid(mi, NULL, cr, recovery, ep); 1591 1592 if (ep->error == 0) 1593 break; 1594 /* 1595 * Return if in recovery or if not a minorversion mismatch 1596 * error. Else retry. 1597 */ 1598 1599 if (recovery || 1600 !(versmismatch = nfs4check_minorvers_mismatch(mi, ep))) 1601 return; 1602 1603 } while (versmismatch); 1604 1605 recov_state.rs_flags = 0; 1606 recov_state.rs_num_retry_despite_err = 0; 1607 recov_retry: 1608 nfs4_error_zinit(ep); 1609 1610 if (!recovery) { 1611 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT, 1612 &recov_state, NULL); 1613 1614 /* 1615 * If recovery has been started and this request as 1616 * initiated by a mount, then we must wait for recovery 1617 * to finish before proceeding, otherwise, the error 1618 * cleanup would remove data structures needed by the 1619 * recovery thread. 1620 */ 1621 if (ep->error) { 1622 mutex_enter(&mi->mi_lock); 1623 if (mi->mi_flags & MI4_MOUNTING) { 1624 mi->mi_flags |= MI4_RECOV_FAIL; 1625 mi->mi_error = EIO; 1626 1627 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1628 "nfs4getfh_otw: waiting 4 recovery\n")); 1629 1630 while (mi->mi_flags & MI4_RECOV_ACTIV) 1631 cv_wait(&mi->mi_failover_cv, 1632 &mi->mi_lock); 1633 } 1634 mutex_exit(&mi->mi_lock); 1635 return; 1636 } 1637 1638 /* 1639 * If the client does not specify a specific flavor to use 1640 * and has not gotten a secinfo list from the server yet, 1641 * retrieve the secinfo list from the server and use a 1642 * flavor from the list to mount. 1643 * 1644 * If fail to get the secinfo list from the server, then 1645 * try the default flavor. 1646 */ 1647 if ((svp->sv_flags & SV4_TRYSECDEFAULT) && 1648 svp->sv_secinfo == NULL) { 1649 (void) nfs4_secinfo_path(mi, cr, FALSE); 1650 } 1651 } 1652 1653 if (recovery) 1654 args.ctag = TAG_REMAP_MOUNT; 1655 else 1656 args.ctag = TAG_MOUNT; 1657 1658 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES; 1659 lookuparg.argsp = &args; 1660 lookuparg.resp = &res; 1661 lookuparg.header_len = 2; /* Putrootfh, getfh */ 1662 lookuparg.trailer_len = 0; 1663 lookuparg.ga_bits = MI4_FSINFO_ATTRMAP(mi); 1664 lookuparg.mi = mi; 1665 1666 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1667 ASSERT(svp->sv_path != NULL); 1668 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0); 1669 nfs_rw_exit(&svp->sv_lock); 1670 1671 argop = args.array; 1672 num_argops = args.array_len; 1673 1674 /* choose public or root filehandle */ 1675 if (flags & NFS4_GETFH_PUBLIC) 1676 argop[0].argop = OP_PUTPUBFH; 1677 else 1678 argop[0].argop = OP_PUTROOTFH; 1679 1680 /* get fh */ 1681 argop[1].argop = OP_GETFH; 1682 1683 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 1684 "nfs4getfh_otw: %s call, mi 0x%p", 1685 needrecov ? "recov" : "first", (void *)mi)); 1686 1687 rfs4call(mi, NULL, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep); 1688 1689 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp); 1690 1691 if (needrecov) { 1692 bool_t abort; 1693 1694 if (recovery) { 1695 nfs4args_lookup_free(argop, num_argops); 1696 kmem_free(argop, 1697 lookuparg.arglen * sizeof (nfs_argop4)); 1698 if (!ep->error) 1699 (void) xdr_free(xdr_COMPOUND4res_clnt, 1700 (caddr_t)&res); 1701 return; 1702 } 1703 1704 NFS4_DEBUG(nfs4_client_recov_debug, 1705 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); 1706 1707 abort = nfs4_start_recovery(ep, mi, NULL, 1708 NULL, NULL, NULL, OP_GETFH, NULL); 1709 if (!ep->error) { 1710 ep->error = geterrno4(res.status); 1711 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1712 } 1713 nfs4args_lookup_free(argop, num_argops); 1714 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1715 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1716 /* have another go? */ 1717 if (abort == FALSE) 1718 goto recov_retry; 1719 return; 1720 } 1721 1722 /* 1723 * No recovery, but check if error is set. 1724 */ 1725 if (ep->error) { 1726 nfs4args_lookup_free(argop, num_argops); 1727 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1728 if (!recovery) 1729 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1730 needrecov); 1731 return; 1732 } 1733 1734 is_link_err: 1735 1736 /* for non-recovery errors */ 1737 if (res.status && res.status != NFS4ERR_SYMLINK) { 1738 if (!recovery) { 1739 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1740 needrecov); 1741 } 1742 nfs4args_lookup_free(argop, num_argops); 1743 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1744 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1745 return; 1746 } 1747 1748 /* 1749 * If any intermediate component in the path is a symbolic link, 1750 * resolve the symlink, then try mount again using the new path. 1751 */ 1752 if (res.status == NFS4ERR_SYMLINK) { 1753 int where; 1754 1755 /* 1756 * This must be from OP_LOOKUP failure. The (cfh) for this 1757 * OP_LOOKUP is a symlink node. Found out where the 1758 * OP_GETFH is for the (cfh) that is a symlink node. 1759 * 1760 * Example: 1761 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR, 1762 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR 1763 * 1764 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink. 1765 * In this case, where = 7, nthcomp = 2. 1766 */ 1767 where = res.array_len - 2; 1768 ASSERT(where > 0); 1769 1770 resop = &res.array[where - 1]; 1771 ASSERT(resop->resop == OP_GETFH); 1772 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1773 nthcomp = res.array_len/3 - 1; 1774 1775 /* 1776 * Need to call nfs4_end_op before resolve_sympath to avoid 1777 * potential nfs4_start_op deadlock. 1778 */ 1779 if (!recovery) 1780 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1781 needrecov); 1782 1783 ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, 1784 flags); 1785 1786 nfs4args_lookup_free(argop, num_argops); 1787 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1788 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1789 1790 if (ep->error) 1791 return; 1792 1793 goto recov_retry; 1794 } 1795 1796 /* getfh */ 1797 resop = &res.array[res.array_len - 2]; 1798 ASSERT(resop->resop == OP_GETFH); 1799 resfhp = &resop->nfs_resop4_u.opgetfh.object; 1800 1801 /* getattr fsinfo res */ 1802 resop++; 1803 garp = &resop->nfs_resop4_u.opgetattr.ga_res; 1804 1805 /* 1806 * verify attrs successfully decoded before 1807 * referencing anything in n4g_ext_res. 1808 */ 1809 if (garp->n4g_attrerr != NFS4_GETATTR_OP_OK) { 1810 if (!recovery) 1811 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1812 needrecov); 1813 ep->error = garp->n4g_attrerr; 1814 nfs4args_lookup_free(argop, num_argops); 1815 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1816 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1817 return; 1818 } 1819 1820 *vtp = garp->n4g_va.va_type; 1821 1822 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet; 1823 1824 mutex_enter(&mi->mi_lock); 1825 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support) 1826 mi->mi_flags |= MI4_LINK; 1827 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support) 1828 mi->mi_flags |= MI4_SYMLINK; 1829 1830 /* 1831 * XXX Currently does not handle change in server personas 1832 */ 1833 if (ATTR_ISSET(garp->n4g_ext_res->n4g_suppattrs, LAYOUT_TYPE) && 1834 !(mi->mi_flags & MI4_PNFS)) { 1835 DTRACE_PROBE4(nfsc__i_getfhotw, char *, 1836 "non pNFS server:", char *, svp->sv_hostname, 1837 char *, "supports FATTR4_FS_LAYOUTTYPE_MASK for ", 1838 char *, svp->sv_path); 1839 } 1840 1841 /* XXX conditionalize lines above */ 1842 if (ATTR_ISSET(garp->n4g_ext_res->n4g_suppattrs, ACL)) 1843 mi->mi_flags |= MI4_ACL; 1844 mutex_exit(&mi->mi_lock); 1845 1846 if (garp->n4g_ext_res->n4g_maxread == 0) 1847 mi->mi_tsize = 1848 MIN(MAXBSIZE, mi->mi_tsize); 1849 else 1850 mi->mi_tsize = 1851 MIN(garp->n4g_ext_res->n4g_maxread, 1852 mi->mi_tsize); 1853 1854 if (garp->n4g_ext_res->n4g_maxwrite == 0) 1855 mi->mi_stsize = 1856 MIN(MAXBSIZE, mi->mi_stsize); 1857 else 1858 mi->mi_stsize = 1859 MIN(garp->n4g_ext_res->n4g_maxwrite, 1860 mi->mi_stsize); 1861 1862 if (garp->n4g_ext_res->n4g_maxfilesize != 0) 1863 mi->mi_maxfilesize = 1864 MIN(garp->n4g_ext_res->n4g_maxfilesize, 1865 mi->mi_maxfilesize); 1866 1867 /* 1868 * If the final component is a a symbolic link, resolve the symlink, 1869 * then try mount again using the new path. 1870 * 1871 * Assume no symbolic link for root filesysm "/". 1872 */ 1873 if (*vtp == VLNK) { 1874 /* 1875 * nthcomp is the total result length minus 1876 * the 1st 2 OPs (PUTROOTFH, GETFH), 1877 * then divided by 3 (LOOKUP,GETFH,GETATTR) 1878 * 1879 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR 1880 * LOOKUP 2nd-comp GETFH GETATTR 1881 * 1882 * (8 - 2)/3 = 2 1883 */ 1884 nthcomp = (res.array_len - 2)/3; 1885 1886 /* 1887 * Need to call nfs4_end_op before resolve_sympath to avoid 1888 * potential nfs4_start_op deadlock. See RFE 4777612. 1889 */ 1890 if (!recovery) 1891 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, 1892 needrecov); 1893 1894 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr, 1895 flags); 1896 1897 nfs4args_lookup_free(argop, num_argops); 1898 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1899 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1900 1901 if (ep->error) 1902 return; 1903 1904 goto recov_retry; 1905 } 1906 1907 /* 1908 * We need to figure out where in the compound the getfh 1909 * for the parent directory is. If the object to be mounted is 1910 * the root, then there is no lookup at all: 1911 * PUTROOTFH, GETFH. 1912 * If the object to be mounted is in the root, then the compound is: 1913 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR. 1914 * In either of these cases, the index of the GETFH is 1. 1915 * If it is not at the root, then it's something like: 1916 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR, 1917 * LOOKUP, GETFH, GETATTR 1918 * In this case, the index is llndx (last lookup index) - 2. 1919 */ 1920 if (llndx == -1 || llndx == 2) 1921 resop = &res.array[1]; 1922 else { 1923 ASSERT(llndx > 2); 1924 resop = &res.array[llndx-2]; 1925 } 1926 1927 ASSERT(resop->resop == OP_GETFH); 1928 tmpfhp = &resop->nfs_resop4_u.opgetfh.object; 1929 1930 /* save the filehandles for the replica */ 1931 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 1932 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE); 1933 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len; 1934 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf, 1935 tmpfhp->nfs_fh4_len); 1936 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE); 1937 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len; 1938 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len); 1939 1940 /* initialize fsid and supp_attrs for server fs */ 1941 svp->sv_fsid = garp->n4g_fsid; 1942 svp->sv_supp_attrs = garp->n4g_ext_res->n4g_suppattrs; 1943 ATTRMAP_SET(svp->sv_supp_attrs, MI4_MAND_ATTRMAP(mi)); 1944 svp->sv_supp_exclcreat = garp->n4g_ext_res->n4g_supp_exclcreat; 1945 1946 nfs_rw_exit(&svp->sv_lock); 1947 1948 nfs4args_lookup_free(argop, num_argops); 1949 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); 1950 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1951 if (!recovery) 1952 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); 1953 } 1954 1955 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ 1956 static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ 1957 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ 1958 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; 1959 1960 /* 1961 * Remap the root filehandle for the given filesystem. 1962 * 1963 * results returned via the nfs4_error_t parameter. 1964 */ 1965 void 1966 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) 1967 { 1968 struct servinfo4 *svp; 1969 vtype_t vtype; 1970 nfs_fh4 rootfh; 1971 int getfh_flags; 1972 char *orig_sv_path; 1973 int orig_sv_pathlen, num_retry; 1974 1975 mutex_enter(&mi->mi_lock); 1976 1977 remap_retry: 1978 svp = mi->mi_curr_serv; 1979 getfh_flags = 1980 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0; 1981 getfh_flags |= 1982 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0; 1983 mutex_exit(&mi->mi_lock); 1984 1985 /* 1986 * Just in case server path being mounted contains 1987 * symlinks and fails w/STALE, save the initial sv_path 1988 * so we can redrive the initial mount compound with the 1989 * initial sv_path -- not a symlink-expanded version. 1990 * 1991 * This could only happen if a symlink was expanded 1992 * and the expanded mount compound failed stale. Because 1993 * it could be the case that the symlink was removed at 1994 * the server (and replaced with another symlink/dir, 1995 * we need to use the initial sv_path when attempting 1996 * to re-lookup everything and recover. 1997 */ 1998 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1999 orig_sv_pathlen = svp->sv_pathlen; 2000 orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 2001 bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); 2002 nfs_rw_exit(&svp->sv_lock); 2003 2004 num_retry = nfs4_max_mount_retry; 2005 2006 do { 2007 /* 2008 * Get the root fh from the server. Retry nfs4_max_mount_retry 2009 * (2) times if it fails with STALE since the recovery 2010 * infrastructure doesn't do STALE recovery for components 2011 * of the server path to the object being mounted. 2012 */ 2013 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep); 2014 2015 if (ep->error == 0 && ep->stat == NFS4_OK) 2016 break; 2017 2018 /* 2019 * For some reason, the mount compound failed. Before 2020 * retrying, we need to restore the original sv_path 2021 * because it might have contained symlinks that were 2022 * expanded by nfsgetfh_otw before the failure occurred. 2023 * replace current sv_path with orig sv_path -- just in case 2024 * it changed due to embedded symlinks. 2025 */ 2026 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2027 if (orig_sv_pathlen != svp->sv_pathlen) { 2028 kmem_free(svp->sv_path, svp->sv_pathlen); 2029 svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); 2030 svp->sv_pathlen = orig_sv_pathlen; 2031 } 2032 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 2033 nfs_rw_exit(&svp->sv_lock); 2034 2035 } while (num_retry-- > 0); 2036 2037 kmem_free(orig_sv_path, orig_sv_pathlen); 2038 2039 if (ep->error != 0 || ep->stat != 0) { 2040 return; 2041 } 2042 2043 if (vtype != VNON && vtype != mi->mi_type) { 2044 /* shouldn't happen */ 2045 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2046 "nfs4_remap_root: server root vnode type (%d) doesn't " 2047 "match mount info (%d)", vtype, mi->mi_type); 2048 } 2049 2050 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2051 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2052 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2053 nfs_rw_exit(&svp->sv_lock); 2054 sfh4_update(mi->mi_rootfh, &rootfh); 2055 2056 /* 2057 * It's possible that recovery took place on the filesystem 2058 * and the server has been updated between the time we did 2059 * the nfs4getfh_otw and now. Re-drive the otw operation 2060 * to make sure we have a good fh. 2061 */ 2062 mutex_enter(&mi->mi_lock); 2063 if (mi->mi_curr_serv != svp) 2064 goto remap_retry; 2065 2066 mutex_exit(&mi->mi_lock); 2067 } 2068 2069 static int 2070 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, 2071 int flags, cred_t *cr, zone_t *zone) 2072 { 2073 vnode_t *rtvp = NULL; 2074 mntinfo4_t *mi; 2075 dev_t nfs_dev; 2076 int error = 0; 2077 rnode4_t *rp; 2078 int i; 2079 struct vattr va; 2080 vtype_t vtype = VNON; 2081 vtype_t tmp_vtype = VNON; 2082 struct servinfo4 *firstsvp = NULL, *svp = svp_head; 2083 nfs4_server_t *np; 2084 nfs4_oo_hash_bucket_t *bucketp; 2085 nfs_fh4 fh; 2086 char *droptext = ""; 2087 nfs4_fname_t *mfname; 2088 nfs4_error_t e; 2089 char *orig_sv_path; 2090 int orig_sv_pathlen, num_retry, removed; 2091 cred_t *lcr = NULL, *tcr = cr; 2092 struct nfs_stats *nfsstatsp; 2093 2094 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 2095 ASSERT(nfsstatsp != NULL); 2096 2097 ASSERT(nfs_zone() == zone); 2098 ASSERT(crgetref(cr)); 2099 2100 /* 2101 * Create a mount record and link it to the vfs struct. 2102 */ 2103 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 2104 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 2105 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL); 2106 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL); 2107 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL); 2108 2109 if (!(flags & NFSMNT_SOFT)) 2110 mi->mi_flags |= MI4_HARD; 2111 if ((flags & NFSMNT_NOPRINT)) 2112 mi->mi_flags |= MI4_NOPRINT; 2113 if (flags & NFSMNT_INT) 2114 mi->mi_flags |= MI4_INT; 2115 if (flags & NFSMNT_PUBLIC) 2116 mi->mi_flags |= MI4_PUBLIC; 2117 if (flags & NFSMNT_MIRRORMOUNT) 2118 mi->mi_flags |= MI4_MIRRORMOUNT; 2119 mi->mi_retrans = NFS_RETRIES; 2120 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 2121 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 2122 mi->mi_timeo = nfs4_cots_timeo; 2123 else 2124 mi->mi_timeo = NFS_TIMEO; 2125 mi->mi_prog = NFS_PROGRAM; 2126 mi->mi_vers = NFS_V4; 2127 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 2128 mi->mi_servers = svp; 2129 mi->mi_curr_serv = svp; 2130 mi->mi_acregmin = SEC2HR(ACREGMIN); 2131 mi->mi_acregmax = SEC2HR(ACREGMAX); 2132 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 2133 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 2134 mi->mi_fh_expire_type = FH4_PERSISTENT; 2135 mi->mi_clientid_next = NULL; 2136 mi->mi_clientid_prev = NULL; 2137 mi->mi_grace_wait = 0; 2138 mi->mi_error = 0; 2139 mi->mi_srvsettime = 0; 2140 2141 mi->mi_count = 1; 2142 2143 mi->mi_tsize = nfs4_tsize(svp->sv_knconf); 2144 mi->mi_stsize = mi->mi_tsize; 2145 2146 if (flags & NFSMNT_DIRECTIO) 2147 mi->mi_flags |= MI4_DIRECTIO; 2148 2149 mi->mi_flags |= MI4_MOUNTING; 2150 2151 /* 2152 * Until a time when the user can set minorversion, do auto 2153 * negotiation. 2154 */ 2155 nfs4_set_minorversion(mi, nfs4_max_minor_version); 2156 2157 /* 2158 * Make a vfs struct for nfs. We do this here instead of below 2159 * because rtvp needs a vfs before we can do a getattr on it. 2160 * 2161 * Assign a unique device id to the mount 2162 */ 2163 mutex_enter(&nfs_minor_lock); 2164 do { 2165 nfs_minor = (nfs_minor + 1) & MAXMIN32; 2166 nfs_dev = makedevice(nfs_major, nfs_minor); 2167 } while (vfs_devismounted(nfs_dev)); 2168 mutex_exit(&nfs_minor_lock); 2169 2170 vfsp->vfs_dev = nfs_dev; 2171 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp); 2172 vfsp->vfs_data = (caddr_t)mi; 2173 vfsp->vfs_fstype = nfsfstyp; 2174 vfsp->vfs_bsize = nfs4_bsize; 2175 2176 /* 2177 * Initialize fields used to support async putpage operations. 2178 */ 2179 for (i = 0; i < NFS4_ASYNC_TYPES; i++) 2180 mi->mi_async_clusters[i] = nfs4_async_clusters; 2181 mi->mi_async_init_clusters = nfs4_async_clusters; 2182 mi->mi_async_curr = &mi->mi_async_reqs[0]; 2183 mi->mi_max_threads = nfs4_max_threads; 2184 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 2185 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 2186 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 2187 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 2188 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); 2189 2190 mi->mi_vfsp = vfsp; 2191 zone_hold(mi->mi_zone = zone); 2192 nfs4_mi_zonelist_add(mi); 2193 2194 /* 2195 * Initialize the <open owner/cred> hash table. 2196 */ 2197 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 2198 bucketp = &(mi->mi_oo_list[i]); 2199 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL); 2200 list_create(&bucketp->b_oo_hash_list, 2201 sizeof (nfs4_open_owner_t), 2202 offsetof(nfs4_open_owner_t, oo_hash_node)); 2203 } 2204 2205 /* 2206 * Initialize the freed open owner list. 2207 */ 2208 mi->mi_foo_num = 0; 2209 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS; 2210 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t), 2211 offsetof(nfs4_open_owner_t, oo_foo_node)); 2212 2213 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t), 2214 offsetof(nfs4_lost_rqst_t, lr_node)); 2215 2216 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t), 2217 offsetof(nfs4_bseqid_entry_t, bs_node)); 2218 2219 /* 2220 * Initialize the msg buffer. 2221 */ 2222 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t), 2223 offsetof(nfs4_debug_msg_t, msg_node)); 2224 mi->mi_msg_count = 0; 2225 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL); 2226 2227 /* 2228 * Initialize kstats 2229 */ 2230 nfs4_mnt_kstat_init(vfsp); 2231 2232 /* 2233 * Initialize the shared filehandle pool. 2234 */ 2235 sfh4_createtab(&mi->mi_filehandles); 2236 2237 /* 2238 * Save server path we're attempting to mount. 2239 */ 2240 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2241 orig_sv_pathlen = svp_head->sv_pathlen; 2242 orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); 2243 bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); 2244 nfs_rw_exit(&svp->sv_lock); 2245 2246 /* 2247 * Make the GETFH call to get root fh for each replica. 2248 */ 2249 if (svp_head->sv_next) 2250 droptext = ", dropping replica"; 2251 2252 /* 2253 * If the uid is set then set the creds for secure mounts 2254 * by proxy processes such as automountd. 2255 */ 2256 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2257 if (svp->sv_secdata->uid != 0 && 2258 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 2259 lcr = crdup(cr); 2260 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 2261 tcr = lcr; 2262 } 2263 nfs_rw_exit(&svp->sv_lock); 2264 for (svp = svp_head; svp; svp = svp->sv_next) { 2265 if (nfs4_chkdup_servinfo4(svp_head, svp)) { 2266 nfs_cmn_err(error, CE_WARN, 2267 VERS_MSG "Host %s is a duplicate%s", 2268 svp->sv_hostname, droptext); 2269 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2270 svp->sv_flags |= SV4_NOTINUSE; 2271 nfs_rw_exit(&svp->sv_lock); 2272 continue; 2273 } 2274 mi->mi_curr_serv = svp; 2275 2276 /* 2277 * Just in case server path being mounted contains 2278 * symlinks and fails w/STALE, save the initial sv_path 2279 * so we can redrive the initial mount compound with the 2280 * initial sv_path -- not a symlink-expanded version. 2281 * 2282 * This could only happen if a symlink was expanded 2283 * and the expanded mount compound failed stale. Because 2284 * it could be the case that the symlink was removed at 2285 * the server (and replaced with another symlink/dir, 2286 * we need to use the initial sv_path when attempting 2287 * to re-lookup everything and recover. 2288 * 2289 * Other mount errors should evenutally be handled here also 2290 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount 2291 * failures will result in mount being redriven a few times. 2292 */ 2293 num_retry = nfs4_max_mount_retry; 2294 do { 2295 nfs4getfh_otw(mi, svp, &tmp_vtype, 2296 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) | 2297 NFS4_GETFH_NEEDSOP, tcr, &e); 2298 2299 if (e.error == 0 && e.stat == NFS4_OK) 2300 break; 2301 2302 /* 2303 * replace current sv_path with orig sv_path -- just in 2304 * case it changed due to embedded symlinks. 2305 */ 2306 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2307 if (orig_sv_pathlen != svp->sv_pathlen) { 2308 kmem_free(svp->sv_path, svp->sv_pathlen); 2309 svp->sv_path = kmem_alloc(orig_sv_pathlen, 2310 KM_SLEEP); 2311 svp->sv_pathlen = orig_sv_pathlen; 2312 } 2313 bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); 2314 nfs_rw_exit(&svp->sv_lock); 2315 2316 } while (num_retry-- > 0); 2317 2318 error = e.error ? e.error : geterrno4(e.stat); 2319 if (error) { 2320 nfs_cmn_err(error, CE_WARN, 2321 VERS_MSG "initial call to %s failed%s: %m", 2322 svp->sv_hostname, droptext); 2323 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2324 svp->sv_flags |= SV4_NOTINUSE; 2325 nfs_rw_exit(&svp->sv_lock); 2326 mi->mi_flags &= ~MI4_RECOV_FAIL; 2327 mi->mi_error = 0; 2328 nfs4_remove_mi_from_server(mi, NULL); 2329 continue; 2330 } 2331 2332 if (tmp_vtype == VBAD) { 2333 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2334 VERS_MSG "%s returned a bad file type for " 2335 "root%s", svp->sv_hostname, droptext); 2336 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2337 svp->sv_flags |= SV4_NOTINUSE; 2338 nfs_rw_exit(&svp->sv_lock); 2339 continue; 2340 } 2341 2342 if (vtype == VNON) { 2343 vtype = tmp_vtype; 2344 } else if (vtype != tmp_vtype) { 2345 zcmn_err(mi->mi_zone->zone_id, CE_WARN, 2346 VERS_MSG "%s returned a different file type " 2347 "for root%s", svp->sv_hostname, droptext); 2348 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2349 svp->sv_flags |= SV4_NOTINUSE; 2350 nfs_rw_exit(&svp->sv_lock); 2351 continue; 2352 } 2353 if (firstsvp == NULL) 2354 firstsvp = svp; 2355 } 2356 2357 kmem_free(orig_sv_path, orig_sv_pathlen); 2358 2359 if (firstsvp == NULL) { 2360 if (error == 0) 2361 error = ENOENT; 2362 goto bad; 2363 } 2364 2365 mi->mi_curr_serv = svp = firstsvp; 2366 2367 /* 2368 * Revert back the clientid to mi_curr_serv 2369 */ 2370 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 2371 mutex_enter(&nfs4_server_lst_lock); 2372 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 2373 mutex_exit(&nfs4_server_lst_lock); 2374 mi->mi_clientid = np->clientid; 2375 mutex_exit(&np->s_lock); 2376 nfs4_server_rele(np); 2377 nfs_rw_exit(&mi->mi_recovlock); 2378 2379 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2380 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0); 2381 fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 2382 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 2383 mi->mi_rootfh = sfh4_get(&fh, mi); 2384 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 2385 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 2386 mi->mi_srvparentfh = sfh4_get(&fh, mi); 2387 nfs_rw_exit(&svp->sv_lock); 2388 2389 /* 2390 * Get the fname for filesystem root. 2391 */ 2392 mi->mi_fname = fn_get(NULL, ".", mi->mi_rootfh); 2393 mfname = mi->mi_fname; 2394 fn_hold(mfname); 2395 2396 /* 2397 * Make the root vnode without attributes. 2398 */ 2399 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL, 2400 &mfname, NULL, mi, cr, gethrtime()); 2401 rtvp->v_type = vtype; 2402 2403 mi->mi_curread = mi->mi_tsize; 2404 mi->mi_curwrite = mi->mi_stsize; 2405 2406 /* 2407 * Start the manager thread responsible for handling async worker 2408 * threads. 2409 */ 2410 MI4_HOLD(mi); 2411 VFS_HOLD(vfsp); /* add reference for thread */ 2412 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager, 2413 vfsp, 0, minclsyspri); 2414 ASSERT(mi->mi_manager_thread != NULL); 2415 2416 /* 2417 * Create the thread that handles over-the-wire calls for 2418 * VOP_INACTIVE. 2419 * This needs to happen after the manager thread is created. 2420 */ 2421 MI4_HOLD(mi); 2422 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread, 2423 mi, 0, minclsyspri); 2424 ASSERT(mi->mi_inactive_thread != NULL); 2425 2426 /* If we didn't get a type, get one now */ 2427 if (rtvp->v_type == VNON) { 2428 va.va_mask = AT_TYPE; 2429 error = nfs4getattr(rtvp, &va, tcr); 2430 if (error) 2431 goto bad; 2432 rtvp->v_type = va.va_type; 2433 } 2434 2435 mi->mi_type = rtvp->v_type; 2436 2437 mutex_enter(&mi->mi_lock); 2438 mi->mi_flags &= ~MI4_MOUNTING; 2439 mutex_exit(&mi->mi_lock); 2440 2441 *rtvpp = rtvp; 2442 if (lcr != NULL) 2443 crfree(lcr); 2444 2445 return (0); 2446 bad: 2447 /* 2448 * An error occurred somewhere, need to clean up... 2449 */ 2450 if (lcr != NULL) 2451 crfree(lcr); 2452 2453 if (rtvp != NULL) { 2454 /* 2455 * We need to release our reference to the root vnode and 2456 * destroy the mntinfo4 struct that we just created. 2457 */ 2458 rp = VTOR4(rtvp); 2459 if (rp->r_flags & R4HASHED) 2460 rp4_rmhash(rp); 2461 VN_RELE(rtvp); 2462 } 2463 nfs4_async_stop(vfsp); 2464 nfs4_async_manager_stop(vfsp); 2465 removed = nfs4_mi_zonelist_remove(mi); 2466 if (removed) 2467 zone_rele(mi->mi_zone); 2468 2469 /* 2470 * This releases the initial "hold" of the mi since it will never 2471 * be referenced by the vfsp. Also, when mount returns to vfs.c 2472 * with an error, the vfsp will be destroyed, not rele'd. 2473 */ 2474 MI4_RELE(mi); 2475 2476 *rtvpp = NULL; 2477 return (error); 2478 } 2479 2480 /* 2481 * vfs operations 2482 */ 2483 static int 2484 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) 2485 { 2486 mntinfo4_t *mi; 2487 ushort_t omax; 2488 int removed; 2489 2490 bool_t must_unlock; 2491 bool_t must_rele; 2492 2493 nfs4_ephemeral_tree_t *eph_tree; 2494 2495 if (secpolicy_fs_unmount(cr, vfsp) != 0) 2496 return (EPERM); 2497 2498 mi = VFTOMI4(vfsp); 2499 2500 if (flag & MS_FORCE) { 2501 vfsp->vfs_flag |= VFS_UNMOUNTED; 2502 if (nfs_zone() != mi->mi_zone) { 2503 /* 2504 * If the request is coming from the wrong zone, 2505 * we don't want to create any new threads, and 2506 * performance is not a concern. Do everything 2507 * inline. 2508 */ 2509 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 2510 "nfs4_unmount x-zone forced unmount of vfs %p\n", 2511 (void *)vfsp)); 2512 nfs4_free_mount(vfsp, flag, cr); 2513 } else { 2514 /* 2515 * Free data structures asynchronously, to avoid 2516 * blocking the current thread (for performance 2517 * reasons only). 2518 */ 2519 async_free_mount(vfsp, flag, cr); 2520 } 2521 2522 return (0); 2523 } 2524 2525 /* 2526 * return all layouts before nfs4_async_stop_sig() is called 2527 */ 2528 layoutreturn_all(vfsp, cr); 2529 /* 2530 * Wait until all asynchronous putpage operations on 2531 * this file system are complete before flushing rnodes 2532 * from the cache. 2533 */ 2534 omax = mi->mi_max_threads; 2535 if (nfs4_async_stop_sig(vfsp)) 2536 return (EINTR); 2537 2538 r4flush(vfsp, cr); 2539 2540 /* 2541 * About the only reason that this would fail would be 2542 * that the harvester is already busy tearing down this 2543 * node. So we fail back to the caller and let them try 2544 * again when needed. 2545 */ 2546 if (nfs4_ephemeral_umount(mi, flag, cr, 2547 &must_unlock, &must_rele, &eph_tree)) { 2548 ASSERT(must_unlock == FALSE); 2549 mutex_enter(&mi->mi_async_lock); 2550 mi->mi_max_threads = omax; 2551 mutex_exit(&mi->mi_async_lock); 2552 2553 return (EBUSY); 2554 } 2555 2556 /* 2557 * If there are any active vnodes on this file system, 2558 * then the file system is busy and can't be unmounted. 2559 */ 2560 if (check_rtable4(vfsp)) { 2561 nfs4_ephemeral_umount_unlock(&must_unlock, &must_rele, 2562 &eph_tree); 2563 2564 mutex_enter(&mi->mi_async_lock); 2565 mi->mi_max_threads = omax; 2566 mutex_exit(&mi->mi_async_lock); 2567 2568 return (EBUSY); 2569 } 2570 2571 /* 2572 * The unmount can't fail from now on, so record any 2573 * ephemeral changes. 2574 */ 2575 nfs4_ephemeral_umount_activate(mi, &must_unlock, 2576 &must_rele, &eph_tree); 2577 2578 /* 2579 * There are no active files that could require over-the-wire 2580 * calls to the server, so stop the async manager and the 2581 * inactive thread. 2582 */ 2583 nfs4_async_manager_stop(vfsp); 2584 2585 /* 2586 * Destroy all rnodes belonging to this file system from the 2587 * rnode hash queues and purge any resources allocated to 2588 * them. 2589 */ 2590 destroy_rtable4(vfsp, cr); 2591 vfsp->vfs_flag |= VFS_UNMOUNTED; 2592 2593 nfs4_remove_mi_from_server(mi, NULL); 2594 removed = nfs4_mi_zonelist_remove(mi); 2595 if (removed) 2596 zone_rele(mi->mi_zone); 2597 2598 return (0); 2599 } 2600 2601 /* 2602 * find root of nfs 2603 */ 2604 static int 2605 nfs4_root(vfs_t *vfsp, vnode_t **vpp) 2606 { 2607 mntinfo4_t *mi; 2608 vnode_t *vp; 2609 nfs4_fname_t *mfname; 2610 servinfo4_t *svp; 2611 2612 mi = VFTOMI4(vfsp); 2613 2614 if (nfs_zone() != mi->mi_zone) 2615 return (EPERM); 2616 2617 svp = mi->mi_curr_serv; 2618 if (svp) { 2619 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2620 if (svp->sv_flags & SV4_ROOT_STALE) { 2621 nfs_rw_exit(&svp->sv_lock); 2622 2623 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2624 if (svp->sv_flags & SV4_ROOT_STALE) { 2625 svp->sv_flags &= ~SV4_ROOT_STALE; 2626 nfs_rw_exit(&svp->sv_lock); 2627 return (ENOENT); 2628 } 2629 nfs_rw_exit(&svp->sv_lock); 2630 } else 2631 nfs_rw_exit(&svp->sv_lock); 2632 } 2633 2634 mfname = mi->mi_fname; 2635 fn_hold(mfname); 2636 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL, 2637 VFTOMI4(vfsp), CRED(), gethrtime()); 2638 2639 if (VTOR4(vp)->r_flags & R4STALE) { 2640 VN_RELE(vp); 2641 return (ENOENT); 2642 } 2643 2644 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 2645 2646 vp->v_type = mi->mi_type; 2647 2648 *vpp = vp; 2649 2650 return (0); 2651 } 2652 2653 static int 2654 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr) 2655 { 2656 int error; 2657 nfs4_ga_res_t gar; 2658 nfs4_ga_ext_res_t ger; 2659 2660 gar.n4g_ext_res = &ger; 2661 2662 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar, 2663 &MI4_STATFS_ATTRMAP(VTOMI4(vp)), cr)) 2664 return (error); 2665 2666 *sbp = gar.n4g_ext_res->n4g_sb; 2667 2668 return (0); 2669 } 2670 2671 /* 2672 * Get file system statistics. 2673 */ 2674 static int 2675 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 2676 { 2677 int error; 2678 vnode_t *vp; 2679 cred_t *cr; 2680 2681 error = nfs4_root(vfsp, &vp); 2682 if (error) 2683 return (error); 2684 2685 cr = CRED(); 2686 2687 error = nfs4_statfs_otw(vp, sbp, cr); 2688 if (!error) { 2689 (void) strncpy(sbp->f_basetype, 2690 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 2691 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 2692 } else { 2693 nfs4_purge_stale_fh(error, vp, cr); 2694 } 2695 2696 VN_RELE(vp); 2697 2698 return (error); 2699 } 2700 2701 static kmutex_t nfs4_syncbusy; 2702 2703 /* 2704 * Flush dirty nfs files for file system vfsp. 2705 * If vfsp == NULL, all nfs files are flushed. 2706 * 2707 * SYNC_CLOSE in flag is passed to us to 2708 * indicate that we are shutting down and or 2709 * rebooting. 2710 */ 2711 static int 2712 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr) 2713 { 2714 /* 2715 * Cross-zone calls are OK here, since this translates to a 2716 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 2717 */ 2718 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) { 2719 r4flush(vfsp, cr); 2720 mutex_exit(&nfs4_syncbusy); 2721 } 2722 2723 /* 2724 * if SYNC_CLOSE is set then we know that 2725 * the system is rebooting, mark the mntinfo 2726 * for later examination. 2727 */ 2728 if (vfsp && (flag & SYNC_CLOSE)) { 2729 mntinfo4_t *mi; 2730 2731 mi = VFTOMI4(vfsp); 2732 if (!(mi->mi_flags & MI4_SHUTDOWN)) { 2733 mutex_enter(&mi->mi_lock); 2734 mi->mi_flags |= MI4_SHUTDOWN; 2735 mutex_exit(&mi->mi_lock); 2736 } 2737 } 2738 return (0); 2739 } 2740 2741 /* 2742 * vget is difficult, if not impossible, to support in v4 because we don't 2743 * know the parent directory or name, which makes it impossible to create a 2744 * useful shadow vnode. And we need the shadow vnode for things like 2745 * OPEN. 2746 */ 2747 2748 /* ARGSUSED */ 2749 /* 2750 * XXX Check nfs4_vget_pseudo() for dependency. 2751 */ 2752 static int 2753 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 2754 { 2755 return (EREMOTE); 2756 } 2757 2758 /* 2759 * nfs4_mountroot get called in the case where we are diskless booting. All 2760 * we need from here is the ability to get the server info and from there we 2761 * can simply call nfs4_rootvp. 2762 */ 2763 /* ARGSUSED */ 2764 static int 2765 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why) 2766 { 2767 vnode_t *rtvp; 2768 char root_hostname[SYS_NMLN+1]; 2769 struct servinfo4 *svp; 2770 int error; 2771 int vfsflags; 2772 size_t size; 2773 char *root_path; 2774 struct pathname pn; 2775 char *name; 2776 cred_t *cr; 2777 struct nfs_args args; /* nfs mount arguments */ 2778 static char token[10]; 2779 2780 bzero(&args, sizeof (args)); 2781 2782 /* do this BEFORE getfile which causes xid stamps to be initialized */ 2783 clkset(-1L); /* hack for now - until we get time svc? */ 2784 2785 if (why == ROOT_REMOUNT) { 2786 /* 2787 * Shouldn't happen. 2788 */ 2789 panic("nfs4_mountroot: why == ROOT_REMOUNT"); 2790 } 2791 2792 if (why == ROOT_UNMOUNT) { 2793 /* 2794 * Nothing to do for NFS. 2795 */ 2796 return (0); 2797 } 2798 2799 /* 2800 * why == ROOT_INIT 2801 */ 2802 2803 name = token; 2804 *name = 0; 2805 (void) getfsname("root", name, sizeof (token)); 2806 2807 pn_alloc(&pn); 2808 root_path = pn.pn_path; 2809 2810 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 2811 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); 2812 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 2813 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2814 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 2815 2816 /* 2817 * Get server address 2818 * Get the root path 2819 * Get server's transport 2820 * Get server's hostname 2821 * Get options 2822 */ 2823 args.addr = &svp->sv_addr; 2824 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 2825 args.fh = (char *)&svp->sv_fhandle; 2826 args.knconf = svp->sv_knconf; 2827 args.hostname = root_hostname; 2828 vfsflags = 0; 2829 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 2830 &args, &vfsflags)) { 2831 if (error == EPROTONOSUPPORT) 2832 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: " 2833 "mount_root failed: server doesn't support NFS V4"); 2834 else 2835 nfs_cmn_err(error, CE_WARN, 2836 "nfs4_mountroot: mount_root failed: %m"); 2837 nfs_rw_exit(&svp->sv_lock); 2838 sv4_free(svp); 2839 pn_free(&pn); 2840 return (error); 2841 } 2842 nfs_rw_exit(&svp->sv_lock); 2843 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 2844 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 2845 (void) strcpy(svp->sv_hostname, root_hostname); 2846 2847 svp->sv_pathlen = (int)(strlen(root_path) + 1); 2848 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); 2849 (void) strcpy(svp->sv_path, root_path); 2850 2851 /* 2852 * Force root partition to always be mounted with AUTH_UNIX for now 2853 */ 2854 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 2855 svp->sv_secdata->secmod = AUTH_UNIX; 2856 svp->sv_secdata->rpcflavor = AUTH_UNIX; 2857 svp->sv_secdata->data = NULL; 2858 2859 cr = crgetcred(); 2860 rtvp = NULL; 2861 2862 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 2863 2864 if (error) { 2865 crfree(cr); 2866 pn_free(&pn); 2867 sv4_free(svp); 2868 return (error); 2869 } 2870 2871 crfree(cr); 2872 2873 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args); 2874 if (error) { 2875 nfs_cmn_err(error, CE_WARN, 2876 "nfs4_mountroot: invalid root mount options"); 2877 pn_free(&pn); 2878 goto errout; 2879 } 2880 2881 (void) vfs_lock_wait(vfsp); 2882 vfs_add(NULL, vfsp, vfsflags); 2883 vfs_unlock(vfsp); 2884 2885 size = strlen(svp->sv_hostname); 2886 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 2887 rootfs.bo_name[size] = ':'; 2888 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 2889 2890 pn_free(&pn); 2891 2892 errout: 2893 if (error) { 2894 sv4_free(svp); 2895 nfs4_async_stop(vfsp); 2896 nfs4_async_manager_stop(vfsp); 2897 } 2898 2899 if (rtvp != NULL) 2900 VN_RELE(rtvp); 2901 2902 return (error); 2903 } 2904 2905 /* 2906 * Initialization routine for VFS routines. Should only be called once 2907 */ 2908 int 2909 nfs4_vfsinit(void) 2910 { 2911 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL); 2912 nfs4setclientid_init(); 2913 nfs4_ephemeral_init(); 2914 nfs4session_init(); 2915 return (0); 2916 } 2917 2918 void 2919 nfs4_vfsfini(void) 2920 { 2921 nfs4_ephemeral_fini(); 2922 nfs4setclientid_fini(); 2923 mutex_destroy(&nfs4_syncbusy); 2924 } 2925 2926 void 2927 nfs4_freevfs(vfs_t *vfsp) 2928 { 2929 mntinfo4_t *mi; 2930 2931 /* need to release the initial hold */ 2932 mi = VFTOMI4(vfsp); 2933 MI4_RELE(mi); 2934 } 2935 2936 /* 2937 * Client side SETCLIENTID and SETCLIENTID_CONFIRM 2938 */ 2939 struct nfs4_server nfs4_server_lst = 2940 { &nfs4_server_lst, &nfs4_server_lst }; 2941 2942 kmutex_t nfs4_server_lst_lock; 2943 2944 static void 2945 nfs4setclientid_init(void) 2946 { 2947 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL); 2948 } 2949 2950 static void 2951 nfs4setclientid_fini(void) 2952 { 2953 mutex_destroy(&nfs4_server_lst_lock); 2954 } 2955 2956 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY; 2957 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES; 2958 2959 2960 /* 2961 * np->s_lock held before entry and return 2962 */ 2963 2964 int 2965 nfs4bind_conn_to_session(nfs4_server_t *np, servinfo4_t *svp, mntinfo4_t *mi, 2966 cred_t *cr, channel_dir_from_client4 dir) 2967 { 2968 COMPOUND4args_clnt args; 2969 COMPOUND4res_clnt res; 2970 nfs_argop4 argop[1]; 2971 BIND_CONN_TO_SESSION4args *argp; 2972 nfs4_error_t e; 2973 int doqueue = 1; 2974 int setcb; 2975 int needrecov = 0; 2976 2977 res.argsp = &args; 2978 2979 args.ctag = TAG_BIND_CONN_TO_SESSION; 2980 args.array = argop; 2981 args.array_len = 1; 2982 2983 args.minor_vers = mi->mi_minorversion; 2984 2985 argop[0].argop = OP_BIND_CONN_TO_SESSION; 2986 argp = &argop[0].nfs_argop4_u.opbind_conn_to_session; 2987 bcopy(&np->ssx.sessionid, &argp->bctsa_sessid, 2988 sizeof (np->ssx.sessionid)); 2989 2990 mutex_exit(&np->s_lock); 2991 2992 argp->bctsa_dir = dir; 2993 argp->bctsa_use_conn_in_rdma_mode = FALSE; 2994 2995 /* 2996 * Avoid callback server setup, if this is a non 2997 * bi-directional rpc connection that is for fore channel only. 2998 */ 2999 3000 if (dir == CDFC4_FORE) 3001 setcb = 0; 3002 else 3003 setcb = RFS4CALL_SETCB; 3004 3005 3006 rfs4call(mi, svp, &args, &res, cr, &doqueue, setcb, &e); 3007 3008 /* 3009 * The errors we need to worry about involve a bad/dead 3010 * session. That is handled by the recovery action. 3011 */ 3012 3013 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 3014 3015 if (e.error && !needrecov) { 3016 mutex_enter(&np->s_lock); 3017 return (e.error); 3018 } 3019 3020 if (!e.error) 3021 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 3022 3023 if (needrecov) { 3024 (void) nfs4_start_recovery(&e, mi, NULL, 3025 NULL, NULL, NULL, OP_BIND_CONN_TO_SESSION, NULL); 3026 } 3027 mutex_enter(&np->s_lock); 3028 return (e.error); 3029 } 3030 3031 static void 3032 nfs4_setup_pnfs_mi(nfs4_server_t *np, mntinfo4_t *mi, servinfo4_t *svp) 3033 { 3034 3035 if (np->s_flags & N4S_USE_PNFS_MDS) { 3036 if ((mi->mi_flags & MI4_PNFS) == 0) { 3037 3038 mi->mi_flags |= MI4_PNFS; 3039 nfs4_pnfs_init_mi(mi); 3040 3041 /* XXX for now cmn_err is handy, will go away later */ 3042 cmn_err(CE_NOTE, "enabling pNFS on %s", 3043 svp->sv_hostname); 3044 3045 DTRACE_PROBE2(nfsc__i_exchangeid, char *, 3046 "enabling pNFS on ", char *, svp->sv_hostname); 3047 } 3048 } 3049 /* 3050 * In the future, we'll need to consider the server turning off 3051 * the MDS bit. This could happen after a server restart with 3052 * PNFS disabled (after having been previously enabled). The client 3053 * might interpret this to be like LAYOUTRECALL_ALL. 3054 */ 3055 3056 } 3057 3058 3059 /* 3060 * Generic routine to set the clientid across 3061 * minor versions. 3062 */ 3063 void 3064 nfs4_set_clientid(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, 3065 bool_t recovery, nfs4_error_t *n4ep) 3066 { 3067 struct nfs4_server *np; 3068 nfs4_recov_state_t recov_state; 3069 int num_retries = 0; 3070 bool_t retry; 3071 bool_t is_dataserver; 3072 cred_t *lcr = NULL; 3073 int retry_inuse = 1; /* only retry once on */ 3074 /* NFS4ERR_CLID_INUSE */ 3075 time_t lease_time = 0; 3076 3077 /* 3078 * If svp is non-NULL, then we're setting the clientID on a pNFS 3079 * data server. Otherwise, it's an MDS or non-pNFS server. 3080 */ 3081 if (svp == NULL) { 3082 svp = mi->mi_curr_serv; 3083 is_dataserver = FALSE; 3084 } else { 3085 is_dataserver = TRUE; 3086 } 3087 3088 recov_state.rs_flags = 0; 3089 recov_state.rs_num_retry_despite_err = 0; 3090 ASSERT(n4ep != NULL); 3091 3092 recov_retry: 3093 retry = FALSE; 3094 nfs4_error_zinit(n4ep); 3095 if (!recovery) 3096 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3097 3098 mutex_enter(&nfs4_server_lst_lock); 3099 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */ 3100 mutex_exit(&nfs4_server_lst_lock); 3101 3102 /* XXXrsb - Would we ever have np == NULL for the DS case? */ 3103 if (!np) { 3104 struct nfs4_server *tnp; 3105 np = new_nfs4_server(svp, cr); 3106 mutex_enter(&np->s_lock); 3107 3108 mutex_enter(&nfs4_server_lst_lock); 3109 tnp = servinfo4_to_nfs4_server(svp); 3110 if (tnp) { 3111 /* 3112 * another thread snuck in and put server on list. 3113 * since we aren't adding it to the nfs4_server_list 3114 * we need to set the ref count to 0 and destroy it. 3115 */ 3116 np->s_refcnt = 0; 3117 destroy_nfs4_server(np); 3118 np = tnp; 3119 } else { 3120 /* 3121 * do not give list a reference until everything 3122 * succeeds 3123 */ 3124 insque(np, &nfs4_server_lst); 3125 } 3126 mutex_exit(&nfs4_server_lst_lock); 3127 } 3128 ASSERT(MUTEX_HELD(&np->s_lock)); 3129 /* 3130 * If we find the server already has N4S_CLIENTID_SET, then 3131 * just return, we've already done SETCLIENTID to that server 3132 */ 3133 if (np->s_flags & N4S_CLIENTID_SET && 3134 !(np->seqhb_flags & NFS4_SEQHB_EXIT)) { 3135 /* 3136 * XXXrsb - We need to be careful of the MDS/DS combo in 3137 * this block. That is, if a server is both an MDS and 3138 * DS, we need to do the right thing. (We should probably 3139 * check the "use bits" on the nfs4_server_t, once we can 3140 * trust them.) 3141 */ 3142 if (is_dataserver == FALSE) { 3143 /* add mi to np's mntinfo4_list */ 3144 nfs4_add_mi_to_server(np, mi); 3145 } 3146 if (!recovery) { 3147 nfs4_set_minorversion(mi, np->s_minorversion); 3148 /* See XXXrsb above */ 3149 if (is_dataserver == FALSE) 3150 nfs4_setup_pnfs_mi(np, mi, svp); 3151 3152 nfs_rw_exit(&mi->mi_recovlock); 3153 } 3154 mutex_exit(&np->s_lock); 3155 nfs4_server_rele(np); 3156 return; 3157 } 3158 mutex_exit(&np->s_lock); 3159 3160 /* 3161 * Drop the mi_recovlock since nfs4_start_op will 3162 * acquire it again for us. 3163 * 3164 * XXXrsb - This gets called from the recovery framework (via 3165 * recov_clientid()) and from nfs4getfh_otw(). In the latter 3166 * case, this is done from an MDS/non-pNFS server and *not* 3167 * a data server. Given that, we can use the "classic" start_op 3168 * and end_op interfaces. 3169 */ 3170 if (!recovery) { 3171 nfs_rw_exit(&mi->mi_recovlock); 3172 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state); 3173 if (n4ep->error) { 3174 nfs4_server_rele(np); 3175 return; 3176 } 3177 } 3178 3179 mutex_enter(&np->s_lock); 3180 while ((np->s_flags & N4S_CLIENTID_PEND) || 3181 (np->seqhb_flags & NFS4_SEQHB_EXIT)) { 3182 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) { 3183 mutex_exit(&np->s_lock); 3184 nfs4_server_rele(np); 3185 /* XXXrsb - See comment above about start_op/end_op */ 3186 if (!recovery) 3187 nfs4_end_op(mi, NULL, NULL, &recov_state, 3188 recovery); 3189 n4ep->error = EINTR; 3190 return; 3191 } 3192 } 3193 3194 if (np->s_flags & N4S_CLIENTID_SET && 3195 !(np->seqhb_flags & NFS4_SEQHB_EXIT)) { 3196 /* XXX copied/pasted from above */ 3197 /* add mi to np's mntinfo4_list */ 3198 if (is_dataserver == FALSE) 3199 nfs4_add_mi_to_server(np, mi); 3200 if (!recovery) { 3201 nfs4_set_minorversion(mi, np->s_minorversion); 3202 if (is_dataserver == FALSE) 3203 nfs4_setup_pnfs_mi(np, mi, svp); 3204 } 3205 mutex_exit(&np->s_lock); 3206 nfs4_server_rele(np); 3207 if (!recovery) 3208 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 3209 return; 3210 } 3211 3212 /* 3213 * Reset the N4S_CB_PINGED flag. This is used to 3214 * indicate if we have received a CB_NULL from the 3215 * server. Also we reset the waiter flag. 3216 */ 3217 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER); 3218 /* any failure must now clear this flag */ 3219 np->s_flags |= N4S_CLIENTID_PEND; 3220 mutex_exit(&np->s_lock); 3221 3222 NFS4_SET_CLIENTID(mi, svp, cr, np, n4ep, &retry_inuse); 3223 3224 if (n4ep->error == EACCES) { 3225 /* 3226 * If the uid is set then set the creds for secure mounts 3227 * by proxy processes such as automountd. 3228 */ 3229 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 3230 if (svp->sv_secdata->uid != 0) { 3231 lcr = crdup(cr); 3232 (void) crsetugid(lcr, svp->sv_secdata->uid, 3233 crgetgid(cr)); 3234 } 3235 nfs_rw_exit(&svp->sv_lock); 3236 3237 if (lcr != NULL) { 3238 mutex_enter(&np->s_lock); 3239 crfree(np->s_cred); 3240 np->s_cred = lcr; 3241 mutex_exit(&np->s_lock); 3242 NFS4_SET_CLIENTID(mi, svp, lcr, np, n4ep, &retry_inuse); 3243 } 3244 } 3245 mutex_enter(&np->s_lock); 3246 lease_time = np->s_lease_time; 3247 np->s_flags &= ~N4S_CLIENTID_PEND; 3248 mutex_exit(&np->s_lock); 3249 3250 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) { 3251 /* 3252 * Start recovery if failover is a possibility. If 3253 * invoked by the recovery thread itself, then just 3254 * return and let it handle the failover first. NB: 3255 * RECOVERY IS NOT ALLOWED IF THE MOUNT IS IN PRogress 3256 * since the infrastructure is not sufficiently setup 3257 * to allow it. Just return the error (after suitable 3258 * retries). 3259 */ 3260 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { 3261 (void) nfs4_start_recovery(n4ep, mi, NULL, 3262 NULL, NULL, NULL, OP_SETCLIENTID, NULL); 3263 /* 3264 * Don't retry here, just return and let 3265 * recovery take over. 3266 */ 3267 if (recovery) 3268 retry = FALSE; 3269 } else if (nfs4_rpc_retry_error(n4ep->error) || 3270 n4ep->stat == NFS4ERR_RESOURCE || 3271 n4ep->stat == NFS4ERR_STALE_CLIENTID) { 3272 3273 retry = TRUE; 3274 /* 3275 * Always retry if in recovery or once had 3276 * contact with the server (but now it's 3277 * overloaded). 3278 */ 3279 if (recovery == TRUE || 3280 n4ep->error == ETIMEDOUT || 3281 n4ep->error == ECONNRESET) 3282 num_retries = 0; 3283 } else if (retry_inuse && n4ep->error == 0 && 3284 n4ep->stat == NFS4ERR_CLID_INUSE) { 3285 retry = TRUE; 3286 num_retries = 0; 3287 } 3288 } else { 3289 /* 3290 * Since everything succeeded give the list a reference count if 3291 * it hasn't been given one by add_new_nfs4_server() or if this 3292 * is not a recovery situation in which case it is already on 3293 * the list. 3294 */ 3295 mutex_enter(&np->s_lock); 3296 if ((np->s_flags & N4S_INSERTED) == 0) { 3297 np->s_refcnt++; 3298 np->s_flags |= N4S_INSERTED; 3299 } 3300 3301 if (is_dataserver == FALSE && !recovery) 3302 nfs4_setup_pnfs_mi(np, mi, svp); 3303 3304 /* 3305 * In recovery or not, a new nfs4_server needs 3306 * to have the minorversion set. 3307 */ 3308 np->s_minorversion = mi->mi_minorversion; 3309 mutex_exit(&np->s_lock); 3310 } 3311 3312 if (!recovery) 3313 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery); 3314 3315 3316 if (retry && num_retries++ < nfs4_num_sclid_retries) { 3317 if (retry_inuse) { 3318 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay)); 3319 retry_inuse = 0; 3320 } else 3321 delay(SEC_TO_TICK(nfs4_retry_sclid_delay)); 3322 3323 nfs4_server_rele(np); 3324 goto recov_retry; 3325 } 3326 3327 3328 if (n4ep->error == 0) 3329 n4ep->error = geterrno4(n4ep->stat); 3330 3331 /* broadcast before release in case no other threads are waiting */ 3332 cv_broadcast(&np->s_clientid_pend); 3333 nfs4_server_rele(np); 3334 } 3335 3336 /* 3337 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes 3338 * mi's clientid the same as sp's. 3339 * Assumes sp is locked down. 3340 */ 3341 void 3342 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi) 3343 { 3344 mntinfo4_t *tmi; 3345 int in_list = 0; 3346 3347 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3348 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3349 ASSERT(sp != &nfs4_server_lst); 3350 ASSERT(MUTEX_HELD(&sp->s_lock)); 3351 3352 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3353 "nfs4_add_mi_to_server: add mi %p to sp %p", 3354 (void*)mi, (void*)sp)); 3355 3356 for (tmi = sp->mntinfo4_list; 3357 tmi != NULL; 3358 tmi = tmi->mi_clientid_next) { 3359 if (tmi == mi) { 3360 NFS4_DEBUG(nfs4_client_lease_debug, 3361 (CE_NOTE, 3362 "nfs4_add_mi_to_server: mi in list")); 3363 in_list = 1; 3364 } 3365 } 3366 3367 /* 3368 * First put a hold on the mntinfo4's vfsp so that references via 3369 * mntinfo4_list will be valid. 3370 */ 3371 if (!in_list) 3372 VFS_HOLD(mi->mi_vfsp); 3373 3374 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: " 3375 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi)); 3376 3377 if (!in_list) { 3378 if (sp->mntinfo4_list) 3379 sp->mntinfo4_list->mi_clientid_prev = mi; 3380 mi->mi_clientid_next = sp->mntinfo4_list; 3381 sp->mntinfo4_list = mi; 3382 mi->mi_srvsettime = gethrestime_sec(); 3383 } 3384 3385 /* set mi's clientid to that of sp's for later matching */ 3386 mi->mi_clientid = sp->clientid; 3387 3388 /* 3389 * Update the clientid for any other mi's belonging to sp. This 3390 * must be done here while we hold sp->s_lock, so that 3391 * find_nfs4_server() continues to work. 3392 */ 3393 3394 for (tmi = sp->mntinfo4_list; 3395 tmi != NULL; 3396 tmi = tmi->mi_clientid_next) { 3397 if (tmi != mi) { 3398 tmi->mi_clientid = sp->clientid; 3399 } 3400 } 3401 } 3402 3403 /* 3404 * Remove the mi from sp's mntinfo4_list and release its reference. 3405 * Exception: if mi still has open files, flag it for later removal (when 3406 * all the files are closed). 3407 * 3408 * If this is the last mntinfo4 in sp's list then tell the lease renewal 3409 * thread to exit. 3410 */ 3411 static void 3412 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp) 3413 { 3414 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3415 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p", 3416 (void*)mi, (void*)sp)); 3417 3418 ASSERT(sp != NULL); 3419 ASSERT(MUTEX_HELD(&sp->s_lock)); 3420 ASSERT(mi->mi_open_files >= 0); 3421 3422 /* 3423 * First make sure this mntinfo4 can be taken off of the list, 3424 * ie: it doesn't have any open files remaining. 3425 */ 3426 if (mi->mi_open_files > 0) { 3427 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3428 "nfs4_remove_mi_from_server_nolock: don't " 3429 "remove mi since it still has files open")); 3430 3431 mutex_enter(&mi->mi_lock); 3432 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE; 3433 mutex_exit(&mi->mi_lock); 3434 return; 3435 } 3436 3437 VFS_HOLD(mi->mi_vfsp); 3438 remove_mi(sp, mi); 3439 VFS_RELE(mi->mi_vfsp); 3440 3441 if (sp->mntinfo4_list == NULL) { 3442 /* last fs unmounted, kill the thread */ 3443 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 3444 "remove_mi_from_nfs4_server_nolock: kill the thread")); 3445 nfs4_mark_srv_dead(sp, 0); 3446 } 3447 } 3448 3449 /* 3450 * Remove mi from sp's mntinfo4_list and release the vfs reference. 3451 */ 3452 static void 3453 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi) 3454 { 3455 ASSERT(MUTEX_HELD(&sp->s_lock)); 3456 3457 /* 3458 * We release a reference, and the caller must still have a 3459 * reference. 3460 */ 3461 ASSERT(mi->mi_vfsp->vfs_count >= 2); 3462 3463 if (mi->mi_clientid_prev) { 3464 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next; 3465 } else { 3466 /* This is the first mi in sp's mntinfo4_list */ 3467 /* 3468 * Make sure the first mntinfo4 in the list is the actual 3469 * mntinfo4 passed in. 3470 */ 3471 ASSERT(sp->mntinfo4_list == mi); 3472 3473 sp->mntinfo4_list = mi->mi_clientid_next; 3474 } 3475 if (mi->mi_clientid_next) 3476 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev; 3477 3478 /* Now mark the mntinfo4's links as being removed */ 3479 mi->mi_clientid_prev = mi->mi_clientid_next = NULL; 3480 3481 VFS_RELE(mi->mi_vfsp); 3482 } 3483 3484 /* 3485 * Free all the entries in sp's mntinfo4_list. 3486 */ 3487 static void 3488 remove_all_mi(nfs4_server_t *sp) 3489 { 3490 mntinfo4_t *mi; 3491 3492 ASSERT(MUTEX_HELD(&sp->s_lock)); 3493 3494 while (sp->mntinfo4_list != NULL) { 3495 mi = sp->mntinfo4_list; 3496 /* 3497 * Grab a reference in case there is only one left (which 3498 * remove_mi() frees). 3499 */ 3500 VFS_HOLD(mi->mi_vfsp); 3501 remove_mi(sp, mi); 3502 VFS_RELE(mi->mi_vfsp); 3503 } 3504 } 3505 3506 /* 3507 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs. 3508 * 3509 * This version can be called with a null nfs4_server_t arg, 3510 * and will either find the right one and handle locking, or 3511 * do nothing because the mi wasn't added to an sp's mntinfo4_list. 3512 */ 3513 void 3514 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp) 3515 { 3516 nfs4_server_t *sp; 3517 3518 if (esp == NULL) { 3519 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3520 sp = find_nfs4_server_all(mi, 1); 3521 } else 3522 sp = esp; 3523 3524 if (sp != NULL) 3525 nfs4_remove_mi_from_server_nolock(mi, sp); 3526 3527 /* 3528 * If we had a valid esp as input, the calling function will be 3529 * responsible for unlocking the esp nfs4_server. 3530 */ 3531 if (esp == NULL) { 3532 if (sp != NULL) 3533 mutex_exit(&sp->s_lock); 3534 nfs_rw_exit(&mi->mi_recovlock); 3535 if (sp != NULL) 3536 nfs4_server_rele(sp); 3537 } 3538 } 3539 3540 /* 3541 * Return TRUE if the given server has any non-unmounted filesystems. 3542 */ 3543 3544 bool_t 3545 nfs4_fs_active(nfs4_server_t *sp) 3546 { 3547 mntinfo4_t *mi; 3548 3549 ASSERT(MUTEX_HELD(&sp->s_lock)); 3550 3551 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) { 3552 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 3553 return (TRUE); 3554 } 3555 3556 return (FALSE); 3557 } 3558 3559 /* 3560 * Mark sp as finished and notify any waiters. 3561 */ 3562 3563 void 3564 nfs4_mark_srv_dead(nfs4_server_t *sp, uint_t zone_shutdown) 3565 { 3566 ASSERT(MUTEX_HELD(&sp->s_lock)); 3567 3568 if (zone_shutdown) 3569 sp->seqhb_flags |= NFS4_SEQHB_EXIT; 3570 else 3571 sp->seqhb_flags |= NFS4_SEQHB_EXITING; 3572 sp->s_thread_exit = NFS4_THREAD_EXIT; 3573 cv_broadcast(&sp->cv_thread_exit); 3574 } 3575 3576 /* 3577 * Layout rnode by fsid avl tree compare function 3578 */ 3579 static int 3580 fsidcmp(const void *p1, const void *p2) 3581 { 3582 const nfs4_fsidlt_t *lt1 = p1; 3583 const nfs4_fsidlt_t *lt2 = p2; 3584 int m; 3585 3586 m = memcmp(<1->lt_fsid, <2->lt_fsid, sizeof (<1->lt_fsid)); 3587 return (m == 0 ? 0 : m < 0 ? -1 : 1); 3588 } 3589 3590 /* 3591 * Layout rnode avl tree compare function 3592 */ 3593 int 3594 layoutcmp(const void *p1, const void *p2) 3595 { 3596 const rnode4_t *r1 = p1; 3597 const rnode4_t *r2 = p2; 3598 3599 return (nfs4cmpfh(&r1->r_fh->sfh_fh, &r2->r_fh->sfh_fh)); 3600 } 3601 3602 /* 3603 * Create a new nfs4_server_t structure. 3604 * Returns new node unlocked and not in list, but with a reference count of 3605 * 1. 3606 */ 3607 struct nfs4_server * 3608 new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3609 { 3610 struct nfs4_server *np; 3611 timespec_t tt; 3612 union { 3613 struct { 3614 uint32_t sec; 3615 uint32_t subsec; 3616 } un_curtime; 3617 verifier4 un_verifier; 3618 } nfs4clientid_verifier; 3619 char id_val[] = "Solaris: %s, NFSv4 kernel client"; 3620 char tag[] = "INITSESS%p"; 3621 int len; 3622 3623 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); 3624 np->saddr.len = svp->sv_addr.len; 3625 np->saddr.maxlen = svp->sv_addr.maxlen; 3626 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP); 3627 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len); 3628 3629 /* 3630 * Initialize rnode avl tree. 3631 */ 3632 mutex_init(&np->s_lt_lock, NULL, MUTEX_DEFAULT, NULL); 3633 avl_create(&np->s_fsidlt, fsidcmp, sizeof (nfs4_fsidlt_t), 3634 offsetof(nfs4_fsidlt_t, lt_node)); 3635 nfs4_pnfs_init_n4s(np); 3636 np->s_refcnt = 1; 3637 3638 /* 3639 * Build the nfs_client_id4 for this server mount. Ensure 3640 * the verifier is useful and that the identification is 3641 * somehow based on the server's address for the case of 3642 * multi-homed servers. 3643 */ 3644 nfs4clientid_verifier.un_verifier = 0; 3645 gethrestime(&tt); 3646 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec; 3647 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec; 3648 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier; 3649 3650 /* 3651 * calculate the length of the opaque identifier. Subtract 2 3652 * for the "%s" and add the traditional +1 for null 3653 * termination. 3654 */ 3655 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1; 3656 np->clidtosend.id_len = len + np->saddr.maxlen; 3657 3658 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP); 3659 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename()); 3660 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len); 3661 3662 np->s_flags = 0; 3663 np->mntinfo4_list = NULL; 3664 /* save cred for issuing rfs4calls inside the renew thread */ 3665 crhold(cr); 3666 np->s_cred = cr; 3667 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL); 3668 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL); 3669 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL); 3670 list_create(&np->s_deleg_list, sizeof (rnode4_t), 3671 offsetof(rnode4_t, r_deleg_link)); 3672 np->s_thread_exit = 0; 3673 np->state_ref_count = 0; 3674 np->lease_valid = NFS4_LEASE_NOT_STARTED; 3675 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL); 3676 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL); 3677 np->s_otw_call_count = 0; 3678 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL); 3679 np->zoneid = getzoneid(); 3680 np->zone_globals = nfs4_get_callback_globals(); 3681 ASSERT(np->zone_globals != NULL); 3682 3683 /* 3684 * Dummy session id untill CREATE_SESSION is completed 3685 */ 3686 (void) snprintf(np->ssx.sessionid, sizeof (sessionid4), tag, curthread); 3687 3688 /* 3689 * By default, we begin with bi-dir rpc 3690 */ 3691 if (nfs41_birpc) { 3692 np->ssx.bi_rpc = 1; 3693 } 3694 3695 /* 3696 * Initialize Slot management fields 3697 */ 3698 cv_init(&np->ssx.slot_wait, NULL, CV_DEFAULT, NULL); 3699 nfs_rw_init(&np->ssx.slot_table_rwlock, NULL, RW_DEFAULT, NULL); 3700 mutex_init(&np->ssx.slot_lock, NULL, MUTEX_DEFAULT, NULL); 3701 return (np); 3702 } 3703 3704 /* 3705 * Create a new nfs4_server_t structure and add it to the list. 3706 * Returns new node locked; reference must eventually be freed. 3707 */ 3708 struct nfs4_server * 3709 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr) 3710 { 3711 nfs4_server_t *sp; 3712 3713 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3714 sp = new_nfs4_server(svp, cr); 3715 mutex_enter(&sp->s_lock); 3716 insque(sp, &nfs4_server_lst); 3717 sp->s_refcnt++; /* list gets a reference */ 3718 sp->s_flags |= N4S_INSERTED; 3719 sp->clientid = 0; 3720 return (sp); 3721 } 3722 3723 int nfs4_server_t_debug = 0; 3724 3725 #ifdef lint 3726 extern void 3727 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *); 3728 #endif 3729 3730 #ifndef lint 3731 #ifdef DEBUG 3732 void 3733 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p) 3734 { 3735 int hash16(void *p, int len); 3736 nfs4_server_t *np; 3737 3738 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE, 3739 "dumping nfs4_server_t list in %s", txt)); 3740 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3741 "mi 0x%p, want clientid %llx, addr %d/%04X", 3742 mi, (longlong_t)clientid, srv_p->sv_addr.len, 3743 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len))); 3744 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; 3745 np = np->forw) { 3746 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3747 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d", 3748 np, (longlong_t)np->clientid, np->saddr.len, 3749 hash16((void *)np->saddr.buf, np->saddr.len), 3750 np->state_ref_count)); 3751 if (np->saddr.len == srv_p->sv_addr.len && 3752 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3753 np->saddr.len) == 0) 3754 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3755 " - address matches")); 3756 if (np->clientid == clientid || np->clientid == 0) 3757 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3758 " - clientid matches")); 3759 if (np->s_thread_exit != NFS4_THREAD_EXIT) 3760 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT, 3761 " - thread not exiting")); 3762 } 3763 delay(hz); 3764 } 3765 #endif 3766 #endif 3767 3768 3769 /* 3770 * Move a mntinfo4_t from one server list to another. 3771 * Locking of the two nfs4_server_t nodes will be done in list order. 3772 * 3773 * Returns NULL if the current nfs4_server_t for the filesystem could not 3774 * be found (e.g., due to forced unmount). Otherwise returns a reference 3775 * to the new nfs4_server_t, which must eventually be freed. 3776 */ 3777 nfs4_server_t * 3778 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new) 3779 { 3780 nfs4_server_t *p, *op = NULL, *np = NULL; 3781 int num_open; 3782 zoneid_t zoneid = nfs_zoneid(); 3783 3784 ASSERT(nfs_zone() == mi->mi_zone); 3785 3786 mutex_enter(&nfs4_server_lst_lock); 3787 #ifdef DEBUG 3788 if (nfs4_server_t_debug) 3789 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new); 3790 #endif 3791 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) { 3792 if (p->zoneid != zoneid) 3793 continue; 3794 if (p->saddr.len == old->sv_addr.len && 3795 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 && 3796 p->s_thread_exit != NFS4_THREAD_EXIT) { 3797 op = p; 3798 mutex_enter(&op->s_lock); 3799 op->s_refcnt++; 3800 } 3801 if (p->saddr.len == new->sv_addr.len && 3802 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 && 3803 p->s_thread_exit != NFS4_THREAD_EXIT) { 3804 np = p; 3805 mutex_enter(&np->s_lock); 3806 } 3807 if (op != NULL && np != NULL) 3808 break; 3809 } 3810 if (op == NULL) { 3811 /* 3812 * Filesystem has been forcibly unmounted. Bail out. 3813 */ 3814 if (np != NULL) 3815 mutex_exit(&np->s_lock); 3816 mutex_exit(&nfs4_server_lst_lock); 3817 return (NULL); 3818 } 3819 if (np != NULL) { 3820 np->s_refcnt++; 3821 } else { 3822 #ifdef DEBUG 3823 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3824 "nfs4_move_mi: no target nfs4_server, will create.")); 3825 #endif 3826 np = add_new_nfs4_server(new, kcred); 3827 } 3828 mutex_exit(&nfs4_server_lst_lock); 3829 3830 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3831 "nfs4_move_mi: for mi 0x%p, " 3832 "old servinfo4 0x%p, new servinfo4 0x%p, " 3833 "old nfs4_server 0x%p, new nfs4_server 0x%p, ", 3834 (void*)mi, (void*)old, (void*)new, 3835 (void*)op, (void*)np)); 3836 ASSERT(op != NULL && np != NULL); 3837 3838 /* discard any delegations */ 3839 nfs4_deleg_discard(mi, op); 3840 3841 num_open = mi->mi_open_files; 3842 mi->mi_open_files = 0; 3843 op->state_ref_count -= num_open; 3844 ASSERT(op->state_ref_count >= 0); 3845 np->state_ref_count += num_open; 3846 nfs4_remove_mi_from_server_nolock(mi, op); 3847 mi->mi_open_files = num_open; 3848 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 3849 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d", 3850 mi->mi_open_files, op->state_ref_count, np->state_ref_count)); 3851 3852 nfs4_add_mi_to_server(np, mi); 3853 3854 mutex_exit(&op->s_lock); 3855 nfs4_server_rele(op); 3856 mutex_exit(&np->s_lock); 3857 3858 return (np); 3859 } 3860 3861 /* 3862 * Need to have the nfs4_server_lst_lock. 3863 * Search the nfs4_server list to find a match on this servinfo4 3864 * based on its address. 3865 * 3866 * Returns NULL if no match is found. Otherwise returns a reference (which 3867 * must eventually be freed) to a locked nfs4_server. 3868 */ 3869 nfs4_server_t * 3870 servinfo4_to_nfs4_server(servinfo4_t *srv_p) 3871 { 3872 nfs4_server_t *np; 3873 zoneid_t zoneid = nfs_zoneid(); 3874 3875 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 3876 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3877 if (np->zoneid == zoneid && 3878 np->saddr.len == srv_p->sv_addr.len && 3879 bcmp(np->saddr.buf, srv_p->sv_addr.buf, 3880 np->saddr.len) == 0) { 3881 mutex_enter(&np->s_lock); 3882 /* 3883 * If there is an already created session 3884 * reuse this nfs4_server_t, even if 3885 * NFS4_THREAD_EXIT is set (which just means 3886 * no mounts exist to the server). 3887 */ 3888 if (np->s_thread_exit == NFS4_THREAD_EXIT && 3889 (!(np->s_flags & N4S_SESSION_CREATED))) { 3890 mutex_exit(&np->s_lock); 3891 continue; 3892 } 3893 np->s_thread_exit = 0; 3894 np->s_refcnt++; 3895 return (np); 3896 } 3897 } 3898 return (NULL); 3899 } 3900 3901 /* 3902 * Search the nfs4_server_lst to find a match based on clientid and 3903 * addr. 3904 * Locks the nfs4_server down if it is found and returns a reference that 3905 * must eventually be freed. 3906 * 3907 * Returns NULL it no match is found. This means one of two things: either 3908 * mi is in the process of being mounted, or mi has been unmounted. 3909 * 3910 * The caller should be holding mi->mi_recovlock, and it should continue to 3911 * hold the lock until done with the returned nfs4_server_t. Once 3912 * mi->mi_recovlock is released, there is no guarantee that the returned 3913 * mi->nfs4_server_t will continue to correspond to mi. 3914 */ 3915 nfs4_server_t * 3916 find_nfs4_server(mntinfo4_t *mi) 3917 { 3918 return (find_nfs4_server_all(mi, 0)); 3919 } 3920 3921 /* 3922 * This is a special version of find_nfs4_server, which takes 3923 * the mi_recovlock, activates the current nfs4_server_t for 3924 * that mi, and drops the lock. This function must be used 3925 * with care, since after dropping mi_recovlock, the mi will 3926 * may no longer refer to this structure. Callers of this 3927 * service must be aware of this and can never assume that 3928 * the value returned remains the current target of the mi. 3929 */ 3930 nfs4_server_t * 3931 find_nfs4_server_nolock(mntinfo4_t *mi) 3932 { 3933 nfs4_server_t *np; 3934 3935 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 3936 np = find_nfs4_server(mi); 3937 nfs_rw_exit(&mi->mi_recovlock); 3938 /* either np is NULL OR n4sp->s_lock is held */ 3939 return (np); 3940 } 3941 3942 /* 3943 * Same as above, but takes an "all" parameter which can be 3944 * set to 1 if the caller wishes to find nfs4_server_t's which 3945 * have been marked for termination by the exit of the renew 3946 * thread. This should only be used by operations which are 3947 * cleaning up and will not cause an OTW op. 3948 */ 3949 nfs4_server_t * 3950 find_nfs4_server_all(mntinfo4_t *mi, int all) 3951 { 3952 nfs4_server_t *np; 3953 servinfo4_t *svp; 3954 zoneid_t zoneid = mi->mi_zone->zone_id; 3955 3956 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3957 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3958 /* 3959 * This can be called from nfs4_unmount() which can be called from the 3960 * global zone, hence it's legal for the global zone to muck with 3961 * another zone's server list, as long as it doesn't try to contact 3962 * them. 3963 */ 3964 ASSERT(zoneid == getzoneid() || getzoneid() == GLOBAL_ZONEID || 3965 nfs_global_client_only != 0); 3966 3967 /* 3968 * The nfs4_server_lst_lock global lock is held when we get a new 3969 * clientid (via SETCLIENTID OTW). Holding this global lock and 3970 * mi_recovlock (READER is fine) ensures that the nfs4_server 3971 * and this mntinfo4 can't get out of sync, so the following search is 3972 * always valid. 3973 */ 3974 mutex_enter(&nfs4_server_lst_lock); 3975 #ifdef DEBUG 3976 if (nfs4_server_t_debug) { 3977 /* mi->mi_clientid is unprotected, ok for debug output */ 3978 dumpnfs4slist("find_nfs4_server", mi, mi->mi_clientid, 3979 mi->mi_curr_serv); 3980 } 3981 #endif 3982 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 3983 mutex_enter(&np->s_lock); 3984 svp = mi->mi_curr_serv; 3985 3986 if (np->zoneid == zoneid && 3987 np->clientid == mi->mi_clientid && 3988 np->saddr.len == svp->sv_addr.len && 3989 bcmp(np->saddr.buf, svp->sv_addr.buf, np->saddr.len) == 0 && 3990 (np->s_thread_exit != NFS4_THREAD_EXIT || all != 0)) { 3991 mutex_exit(&nfs4_server_lst_lock); 3992 np->s_refcnt++; 3993 return (np); 3994 } 3995 mutex_exit(&np->s_lock); 3996 } 3997 mutex_exit(&nfs4_server_lst_lock); 3998 3999 return (NULL); 4000 } 4001 4002 /* ARGSUSED */ 4003 nfs4_server_t * 4004 find_nfs4_server_by_addr(struct netbuf *nb, struct knetconfig *knc) 4005 { 4006 nfs4_server_t *np; 4007 4008 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock)); 4009 4010 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 4011 mutex_enter(&np->s_lock); 4012 4013 if (np->saddr.len == nb->len && 4014 bcmp(np->saddr.buf, nb->buf, np->saddr.len) == 0 && 4015 (np->s_thread_exit != NFS4_THREAD_EXIT)) { 4016 mutex_exit(&nfs4_server_lst_lock); 4017 np->s_refcnt++; 4018 return (np); 4019 } 4020 mutex_exit(&np->s_lock); 4021 } 4022 /* 4023 * NB - return holding lst_lock so caller can insert using 4024 * add_new_nfs4_server without a race. 4025 */ 4026 return (NULL); 4027 } 4028 4029 /* 4030 * Take a new reference to the nfs4_server. Note that several 4031 * routines need to do this inline in order to keep the lock. 4032 */ 4033 void 4034 nfs4_server_hold(nfs4_server_t *sp) 4035 { 4036 mutex_enter(&sp->s_lock); 4037 sp->s_refcnt++; 4038 mutex_exit(&sp->s_lock); 4039 } 4040 4041 /* 4042 * Release the reference to sp and destroy it if that's the last one. 4043 */ 4044 void 4045 nfs4_server_rele(nfs4_server_t *sp) 4046 { 4047 mutex_enter(&sp->s_lock); 4048 nfs4_server_rele_lockt(sp); 4049 /* s_lock has been released */ 4050 } 4051 4052 void 4053 nfs4_server_rele_lockt(nfs4_server_t *sp) 4054 { 4055 ASSERT(MUTEX_HELD(&sp->s_lock)); 4056 ASSERT(sp->s_refcnt > 0); 4057 sp->s_refcnt--; 4058 if (sp->s_refcnt > 0) { 4059 mutex_exit(&sp->s_lock); 4060 return; 4061 } 4062 mutex_exit(&sp->s_lock); 4063 4064 mutex_enter(&nfs4_server_lst_lock); 4065 mutex_enter(&sp->s_lock); 4066 if (sp->s_refcnt > 0) { 4067 mutex_exit(&sp->s_lock); 4068 mutex_exit(&nfs4_server_lst_lock); 4069 return; 4070 } 4071 remque(sp); 4072 sp->forw = sp->back = NULL; 4073 mutex_exit(&nfs4_server_lst_lock); 4074 destroy_nfs4_server(sp); 4075 } 4076 4077 /* 4078 * Initiate and wait for destroy of a session. 4079 */ 4080 4081 void 4082 nfs4_cleanup_oldsession(nfs4_server_t *np) 4083 { 4084 mutex_enter(&np->s_lock); 4085 if (np->seqhb_flags & NFS4_SEQHB_STARTED) { 4086 4087 /* 4088 * If not already signalled in start_recovery() 4089 * signal sequence_heartbeat_thread() to exit. 4090 */ 4091 4092 if (!(np->seqhb_flags & NFS4_SEQHB_EXIT)) { 4093 np->seqhb_flags |= NFS4_SEQHB_EXIT; 4094 np->s_refcnt++; 4095 cv_broadcast(&np->cv_thread_exit); 4096 } 4097 4098 /* 4099 * Wait for the sequence heartbeat thread to exit 4100 * On it's way out, this will destroy the session. 4101 */ 4102 4103 while (np->seqhb_flags & NFS4_SEQHB_EXIT) { 4104 cv_wait(&np->ssx_wait, &np->s_lock); 4105 } 4106 4107 mutex_exit(&np->s_lock); 4108 4109 } else if (np->seqhb_flags & NFS4_SEQHB_DESTROY) { 4110 /* 4111 * If (seqhb_flags & NFS4_SEQHB_DESTROY == TRUE) then the 4112 * sequence heart beat thread raced us and has already 4113 * destroyed the session. Nothing more to do. 4114 */ 4115 mutex_exit(&np->s_lock); 4116 } else if (np->s_flags & N4S_SESSION_CREATED) { 4117 /* 4118 * No sequence heartbeat thread means this 4119 * session is to a data server. Just destroy the 4120 * the session. 4121 */ 4122 np->seqhb_flags = 0; 4123 mutex_exit(&np->s_lock); 4124 nfs4destroy_session(np, NULL); 4125 } else { 4126 np->seqhb_flags = 0; 4127 mutex_exit(&np->s_lock); 4128 } 4129 } 4130 4131 void 4132 nfs4destroy_session_otw(nfs4_session_t *sessp, CLIENT *clientp) 4133 { 4134 COMPOUND4args_clnt args; 4135 COMPOUND4res_clnt res; 4136 nfs_argop4 argop[2]; 4137 nfs4_slot_t *slotp; 4138 struct timeval wait; 4139 enum clnt_stat status; 4140 nfs4_error_t e; 4141 uint32_t zilch = 0; 4142 4143 res.argsp = &args; 4144 res.array = NULL; 4145 res.status = 0; 4146 res.array_len = 0; 4147 res.decode_len = 0; 4148 4149 args.ctag = TAG_DESTROY_SESSION; 4150 4151 args.array = argop; 4152 args.array_len = 2; 4153 args.minor_vers = nfs4_max_minor_version; 4154 4155 argop[0].argop = OP_SEQUENCE; 4156 4157 argop[1].argop = OP_DESTROY_SESSION; 4158 bcopy(sessp->sessionid, 4159 argop[1].nfs_argop4_u.opdestroy_session.dsa_sessionid, 4160 sizeof (sessp->sessionid)); 4161 4162 TICK_TO_TIMEVAL(30 * hz / 10, &wait); 4163 4164 if (!(CLNT_CONTROL(clientp, CLSET_XID, (char *)&zilch))) { 4165 zcmn_err(getzoneid(), CE_WARN, 4166 "Failed to zero xid to destroy session"); 4167 goto destroy; 4168 } 4169 4170 nfs4sequence_setup(sessp, &args, &slotp); 4171 status = CLNT_CALL(clientp, NFSPROC4_COMPOUND, 4172 xdr_COMPOUND4args_clnt, (caddr_t)&args, 4173 xdr_COMPOUND4res_clnt, (caddr_t)&res, 4174 wait); 4175 4176 nfs4_error_set(&e, status, res.status); 4177 nfs4sequence_fin(sessp, &res, slotp, &e); 4178 4179 if (status != RPC_SUCCESS || res.status || 4180 res.array[1].nfs_resop4_u.opdestroy_session.dsr_status) { 4181 DTRACE_PROBE1(nfsc__i_destroysession, char *, 4182 "Destroy_session request failed, destroying anyways"); 4183 goto destroy; 4184 } 4185 4186 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 4187 4188 destroy: 4189 kmem_free(sessp->slot_table, 4190 sessp->slot_table_size * sizeof (void *)); 4191 kmem_free(sessp->cb_slot_table, 4192 sessp->cb_slot_table_size * sizeof (void *)); 4193 kmem_free(sessp->saddr.buf, sessp->saddr.len); 4194 } 4195 4196 void 4197 nfs4destroy_session(nfs4_server_t *np, CLIENT *seqhandle) 4198 { 4199 struct nfs41_cb_info *cbi; 4200 struct nfs4_callback_globals *ncg = np->zone_globals; 4201 4202 /* XXX currently no otw destroy for null client handles */ 4203 if (seqhandle != NULL) 4204 nfs4destroy_session_otw(&np->ssx, seqhandle); 4205 4206 4207 mutex_enter(&np->s_lock); 4208 cbi = ncg->nfs4prog2cbinfo[np->s_program - NFS4_CALLBACK]; 4209 mutex_exit(&np->s_lock); 4210 4211 /* 4212 * Tell callback connection thread to exit. 4213 */ 4214 mutex_enter(&cbi->cb_cbconn_lock); 4215 cbi->cb_cbconn_exit = TRUE; 4216 cv_broadcast(&cbi->cb_cbconn_wait); 4217 mutex_exit(&cbi->cb_cbconn_lock); 4218 4219 /* 4220 * Tell callback handling thread to exit. 4221 * Wait till it exits and then free the cbinfo. 4222 */ 4223 4224 mutex_enter(&cbi->cb_rpc->r_lock); 4225 cbi->cb_flags |= NFS41_CB_THREAD_EXIT; 4226 cv_broadcast(&cbi->cb_rpc->r_cbwait); 4227 mutex_exit(&cbi->cb_rpc->r_lock); 4228 4229 mutex_enter(&cbi->cb_reflock); 4230 while (cbi->cb_refcnt != 1) { 4231 cv_wait(&cbi->cb_destroy_wait, &cbi->cb_reflock); 4232 } 4233 mutex_exit(&cbi->cb_reflock); 4234 4235 mutex_enter(&np->s_lock); 4236 nfs4callback_destroy(np); 4237 np->s_flags &= ~(N4S_SESSION_CREATED); 4238 mutex_exit(&np->s_lock); 4239 } 4240 4241 static void 4242 destroy_nfs4_server(nfs4_server_t *sp) 4243 { 4244 nfs4_fsidlt_t *ltp = NULL; 4245 void *cookie = NULL; 4246 4247 ASSERT(MUTEX_HELD(&sp->s_lock)); 4248 ASSERT(sp->s_refcnt == 0); 4249 ASSERT(sp->s_otw_call_count == 0); 4250 4251 remove_all_mi(sp); 4252 4253 crfree(sp->s_cred); 4254 kmem_free(sp->saddr.buf, sp->saddr.maxlen); 4255 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len); 4256 mutex_exit(&sp->s_lock); 4257 4258 while ((ltp = avl_destroy_nodes(&sp->s_fsidlt, &cookie)) != NULL) { 4259 avl_destroy(<p->lt_rlayout_tree); 4260 kmem_free(ltp, sizeof (*ltp)); 4261 } 4262 avl_destroy(&sp->s_fsidlt); 4263 pnfs_trash_devtree(sp); 4264 4265 /* destroy the nfs4_server */ 4266 nfs4callback_destroy(sp); 4267 list_destroy(&sp->s_deleg_list); 4268 mutex_destroy(&sp->s_lock); 4269 cv_destroy(&sp->cv_thread_exit); 4270 cv_destroy(&sp->s_cv_otw_count); 4271 cv_destroy(&sp->s_clientid_pend); 4272 cv_destroy(&sp->wait_cb_null); 4273 nfs_rw_destroy(&sp->s_recovlock); 4274 kmem_free(sp, sizeof (*sp)); 4275 } 4276 4277 /* 4278 * Lock sp, but only if it's still active (in the list and hasn't been 4279 * flagged as exiting) or 'all' is non-zero. 4280 * Returns TRUE if sp got locked and adds a reference to sp. 4281 */ 4282 bool_t 4283 nfs4_server_vlock(nfs4_server_t *sp, int all) 4284 { 4285 nfs4_server_t *np; 4286 4287 mutex_enter(&nfs4_server_lst_lock); 4288 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 4289 if (sp == np && (np->s_thread_exit != NFS4_THREAD_EXIT || 4290 all != 0)) { 4291 mutex_enter(&np->s_lock); 4292 np->s_refcnt++; 4293 mutex_exit(&nfs4_server_lst_lock); 4294 return (TRUE); 4295 } 4296 } 4297 mutex_exit(&nfs4_server_lst_lock); 4298 return (FALSE); 4299 } 4300 4301 /* 4302 * Fork off a thread to free the data structures for a mount. 4303 */ 4304 4305 static void 4306 async_free_mount(vfs_t *vfsp, int flag, cred_t *cr) 4307 { 4308 freemountargs_t *args; 4309 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP); 4310 args->fm_vfsp = vfsp; 4311 VFS_HOLD(vfsp); 4312 MI4_HOLD(VFTOMI4(vfsp)); 4313 args->fm_flag = flag; 4314 args->fm_cr = cr; 4315 crhold(cr); 4316 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0, 4317 minclsyspri); 4318 } 4319 4320 static void 4321 nfs4_free_mount_thread(freemountargs_t *args) 4322 { 4323 mntinfo4_t *mi; 4324 nfs4_free_mount(args->fm_vfsp, args->fm_flag, args->fm_cr); 4325 mi = VFTOMI4(args->fm_vfsp); 4326 crfree(args->fm_cr); 4327 VFS_RELE(args->fm_vfsp); 4328 MI4_RELE(mi); 4329 kmem_free(args, sizeof (freemountargs_t)); 4330 zthread_exit(); 4331 /* NOTREACHED */ 4332 } 4333 4334 /* 4335 * Thread to free the data structures for a given filesystem. 4336 */ 4337 static void 4338 nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr) 4339 { 4340 mntinfo4_t *mi = VFTOMI4(vfsp); 4341 nfs4_server_t *sp; 4342 callb_cpr_t cpr_info; 4343 kmutex_t cpr_lock; 4344 boolean_t async_thread; 4345 int removed; 4346 4347 bool_t must_unlock; 4348 bool_t must_rele; 4349 nfs4_ephemeral_tree_t *eph_tree; 4350 4351 /* 4352 * We need to participate in the CPR framework if this is a kernel 4353 * thread. 4354 */ 4355 async_thread = (curproc == nfs_zone()->zone_zsched); 4356 if (async_thread) { 4357 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 4358 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 4359 "nfsv4AsyncUnmount"); 4360 } 4361 4362 /* 4363 * We need to wait for all outstanding OTW calls 4364 * and recovery to finish before we remove the mi 4365 * from the nfs4_server_t, as current pending 4366 * calls might still need this linkage (in order 4367 * to find a nfs4_server_t from a mntinfo4_t). 4368 */ 4369 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 4370 sp = find_nfs4_server(mi); 4371 nfs_rw_exit(&mi->mi_recovlock); 4372 4373 if (sp) { 4374 while (sp->s_otw_call_count != 0) { 4375 if (async_thread) { 4376 mutex_enter(&cpr_lock); 4377 CALLB_CPR_SAFE_BEGIN(&cpr_info); 4378 mutex_exit(&cpr_lock); 4379 } 4380 cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 4381 if (async_thread) { 4382 mutex_enter(&cpr_lock); 4383 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 4384 mutex_exit(&cpr_lock); 4385 } 4386 } 4387 mutex_exit(&sp->s_lock); 4388 nfs4_server_rele(sp); 4389 sp = NULL; 4390 } 4391 4392 mutex_enter(&mi->mi_lock); 4393 while (mi->mi_in_recovery != 0) { 4394 if (async_thread) { 4395 mutex_enter(&cpr_lock); 4396 CALLB_CPR_SAFE_BEGIN(&cpr_info); 4397 mutex_exit(&cpr_lock); 4398 } 4399 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 4400 if (async_thread) { 4401 mutex_enter(&cpr_lock); 4402 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 4403 mutex_exit(&cpr_lock); 4404 } 4405 } 4406 mutex_exit(&mi->mi_lock); 4407 4408 /* 4409 * If we got an error, then do not nuke the 4410 * tree. Either the harvester is busy reclaiming 4411 * this node or we ran into some busy condition. 4412 * 4413 * The harvester will eventually come along and cleanup. 4414 * The only problem would be the root mount point. 4415 * 4416 * Since the busy node can occur for a variety 4417 * of reasons and can result in an entry staying 4418 * in df output but no longer accessible from the 4419 * directory tree, we are okay. 4420 */ 4421 if (!nfs4_ephemeral_umount(mi, flag, cr, 4422 &must_unlock, &must_rele, &eph_tree)) 4423 nfs4_ephemeral_umount_activate(mi, &must_unlock, 4424 &must_rele, &eph_tree); 4425 4426 /* 4427 * The original purge of the dnlc via 'dounmount' 4428 * doesn't guarantee that another dnlc entry was not 4429 * added while we waitied for all outstanding OTW 4430 * and recovery calls to finish. So re-purge the 4431 * dnlc now. 4432 */ 4433 (void) dnlc_purge_vfsp(vfsp, 0); 4434 4435 /* 4436 * We need to explicitly stop the manager thread; the asyc worker 4437 * threads can timeout and exit on their own. 4438 */ 4439 mutex_enter(&mi->mi_async_lock); 4440 mi->mi_max_threads = 0; 4441 cv_broadcast(&mi->mi_async_work_cv); 4442 mutex_exit(&mi->mi_async_lock); 4443 if (mi->mi_manager_thread) 4444 nfs4_async_manager_stop(vfsp); 4445 4446 destroy_rtable4(vfsp, cr); 4447 4448 nfs4_remove_mi_from_server(mi, NULL); 4449 4450 if (async_thread) { 4451 mutex_enter(&cpr_lock); 4452 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 4453 mutex_destroy(&cpr_lock); 4454 } 4455 4456 removed = nfs4_mi_zonelist_remove(mi); 4457 if (removed) 4458 zone_rele(mi->mi_zone); 4459 } --- EOF ---