Move CallBack Server thread creation, initial processing and destruction to RPC Cleanup some RPC code. Remove extraneous fields from nfs41_cb_info and clean up the code. Change KM_SLEEP in mir_nfs41_callback_thread to KM_NOSLEEP. Fix lint warnings
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/cred.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/pathname.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/kstat.h> 39 #include <sys/mkdev.h> 40 #include <sys/mount.h> 41 #include <sys/statvfs.h> 42 #include <sys/errno.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/utsname.h> 46 #include <sys/bootconf.h> 47 #include <sys/modctl.h> 48 #include <sys/acl.h> 49 #include <sys/flock.h> 50 #include <sys/kstr.h> 51 #include <sys/stropts.h> 52 #include <sys/strsubr.h> 53 #include <sys/atomic.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/list.h> 57 #include <sys/zone.h> 58 #include <sys/sdt.h> 59 60 #include <rpc/types.h> 61 #include <rpc/auth.h> 62 #include <rpc/rpcsec_gss.h> 63 #include <rpc/clnt.h> 64 #include <rpc/xdr.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/nfs_clnt.h> 68 #include <nfs/mount.h> 69 #include <nfs/nfs_acl.h> 70 71 #include <fs/fs_subr.h> 72 73 #include <nfs/nfs4.h> 74 #include <nfs/rnode4.h> 75 #include <nfs/nfs4_clnt.h> 76 #include <nfs/nfssys.h> 77 #include <nfs/nfs4_pnfs.h> 78 79 #ifdef DEBUG 80 /* 81 * These are "special" state IDs and file handles that 82 * match any delegation state ID or file handled. This 83 * is for testing purposes only. 84 */ 85 86 87 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 }; 88 char nfs4_deleg_fh[] = "\0377\0376\0375\0374"; 89 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh }; 90 nfsstat4 cb4_getattr_fail = NFS4_OK; 91 nfsstat4 cb4_recall_fail = NFS4_OK; 92 93 int nfs4_callback_debug; 94 int nfs4_recall_debug; 95 int nfs4_drat_debug; 96 97 #endif 98 99 int nfs41_birpc = 1; /* Use bidirectional rpc */ 100 101 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x)) 102 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x)) 103 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y)) 104 105 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE; 106 107 static zone_key_t nfs4_callback_zone_key; 108 109 /* 110 * NFS4_MAPSIZE is the number of bytes we are willing to consume 111 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK 112 * style delegation. 113 */ 114 115 #define NFS4_MAPSIZE 8192 116 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t) 117 #define NbPW (NBBY*sizeof (uint_t)) 118 119 static int nfs4_num_prognums = 1024; 120 static SVC_CALLOUT_TABLE nfs4_cb_sct; 121 122 struct nfs4_dnode { 123 list_node_t linkage; 124 rnode4_t *rnodep; 125 int flags; /* Flags for nfs4delegreturn_impl() */ 126 }; 127 128 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = { 129 { "delegations", KSTAT_DATA_UINT64 }, 130 { "cb_getattr", KSTAT_DATA_UINT64 }, 131 { "cb_recall", KSTAT_DATA_UINT64 }, 132 { "cb_null", KSTAT_DATA_UINT64 }, 133 { "cb_dispatch", KSTAT_DATA_UINT64 }, 134 { "delegaccept_r", KSTAT_DATA_UINT64 }, 135 { "delegaccept_rw", KSTAT_DATA_UINT64 }, 136 { "delegreturn", KSTAT_DATA_UINT64 }, 137 { "callbacks", KSTAT_DATA_UINT64 }, 138 { "claim_cur", KSTAT_DATA_UINT64 }, 139 { "claim_cur_ok", KSTAT_DATA_UINT64 }, 140 { "recall_trunc", KSTAT_DATA_UINT64 }, 141 { "recall_failed", KSTAT_DATA_UINT64 }, 142 { "return_limit_write", KSTAT_DATA_UINT64 }, 143 { "return_limit_addmap", KSTAT_DATA_UINT64 }, 144 { "deleg_recover", KSTAT_DATA_UINT64 }, 145 { "cb_illegal", KSTAT_DATA_UINT64 }, 146 { "cb_sequence", KSTAT_DATA_UINT64 } 147 }; 148 149 struct nfs4_cb_port { 150 list_node_t linkage; /* linkage into per-zone port list */ 151 char netid[KNC_STRSIZE]; 152 char uaddr[KNC_STRSIZE]; 153 char protofmly[KNC_STRSIZE]; 154 char proto[KNC_STRSIZE]; 155 }; 156 157 static int cb_getattr_bytes; 158 159 struct cb_recall_pass { 160 rnode4_t *rp; 161 int flags; /* Flags for nfs4delegreturn_impl() */ 162 bool_t truncate; 163 }; 164 165 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int); 166 static void nfs4delegreturn_thread(struct cb_recall_pass *); 167 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *, 168 int); 169 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int); 170 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int); 171 static int nfs4delegreturn_impl(rnode4_t *, int, 172 struct nfs4_callback_globals *); 173 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *, 174 struct nfs4_callback_globals *); 175 176 177 /* 178 * Only used for non-bidirectional RPC --Performs a BC2S and 179 * starts the cbconn_thread. 180 * (expects np->s_lock to be held) 181 */ 182 183 void 184 nfs41set_callback(nfs4_server_t *np, servinfo4_t *svp, mntinfo4_t *mi, 185 cred_t *cr) 186 { 187 struct nfs41_cb_info *cbi; 188 CLIENT *client; 189 struct nfs4_clnt *nfscl; 190 int error; 191 192 ASSERT(MUTEX_HELD(&np->s_lock)); 193 194 if (nfs4bind_conn_to_session(np, svp, mi, cr, CDFC4_BACK)) { 195 zcmn_err(getzoneid(), CE_WARN, 196 "Callback Channel Binding Failed"); 197 return; 198 } 199 200 /* 201 * The following below is to create a client handle 202 * used only by the cbconn_thread to send out NFSPROC4_NULL 203 * and should not be used for anything else. 204 */ 205 cbi = np->zone_globals->nfs4prog2cbinfo[np->s_program-NFS4_CALLBACK]; 206 ASSERT(cbi != NULL); 207 client = cbi->cb_client; 208 209 /* 210 * If client from a previous session, destroy it first 211 */ 212 if (client) { 213 AUTH_DESTROY(client->cl_auth); 214 CLNT_DESTROY(client); 215 } 216 217 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 218 ASSERT(nfscl != NULL); 219 220 /* Get a CLIENT handle */ 221 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, 222 NFS4_PROGRAM, NFS_V4, 0, 0, np->s_cred, &client); 223 224 if (error != 0) { 225 zcmn_err(getzoneid(), CE_WARN, 226 "Failed to get handle for callback"); 227 cbi->cb_client = NULL; 228 return; 229 } 230 231 /* Define this handle as a back channel handle */ 232 if (!(CLNT_CONTROL(client, CLSET_BACKCHANNEL, NULL))) { 233 zcmn_err(getzoneid(), CE_WARN, 234 "Failed to set client handle as callback"); 235 CLNT_DESTROY(client); 236 cbi->cb_client = NULL; 237 return; 238 } 239 240 /* Associate it with the session */ 241 if (!CLNT_CONTROL(client, CLSET_TAG, (char *)(np->ssx.sessionid))) { 242 zcmn_err(getzoneid(), CE_WARN, 243 "Failed to set tag on client handle"); 244 CLNT_DESTROY(client); 245 cbi->cb_client = NULL; 246 return; 247 } 248 249 cbi->cb_nfscl = nfscl; 250 cbi->cb_client = client; 251 252 /* 253 * Now start the cbconn_thread 254 */ 255 256 np->s_refcnt++; 257 mutex_enter(&cbi->cb_reflock); 258 cbi->cb_refcnt++; 259 mutex_exit(&cbi->cb_reflock); 260 (void) zthread_create(NULL, 0, nfs4_cbconn_thread, np, 0, 261 minclsyspri); 262 } 263 264 /* 265 * nfs4_cbconn_thread is used to send a null op to the server over the 266 * backchannel connection, to keep the back channel connection up. 267 * This is not needed for bidirectional rpc as the op_sequence 268 * heartbeat thread is doing the same thing. 269 */ 270 void 271 nfs4_cbconn_thread(nfs4_server_t *np) 272 { 273 clock_t tick_delay; 274 callb_cpr_t cpr_info; 275 kmutex_t cpr_lock; 276 struct nfs41_cb_info *cbi; 277 uint32_t zilch = 0; 278 int timeo; 279 struct timeval wait; 280 enum clnt_stat rpc_stat; 281 282 cbi = np->zone_globals->nfs4prog2cbinfo[np->s_program-NFS4_CALLBACK]; 283 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 284 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4cbconn"); 285 286 timeo = (NFS_TIMEO * hz) / 10; 287 timeo = (MIN(NFS_TIMEO, (NFS_COTS_TIMEO / 10)) * hz) / 10; 288 TICK_TO_TIMEVAL(timeo, &wait); 289 tick_delay = MSEC_TO_TICK((4 * (60 * 1000L))); 290 291 while (!(cbi->cb_cbconn_exit)) { 292 if (!(CLNT_CONTROL(cbi->cb_client, CLSET_XID, 293 (char *)&zilch))) { 294 zcmn_err(getzoneid(), CE_WARN, 295 "Failed to zero xid, cbconn thread exiting"); 296 break; 297 } 298 /* Execute remote NULL procedure to establish the connection */ 299 rpc_stat = CLNT_CALL(cbi->cb_client, NFSPROC4_NULL, 300 xdr_void, NULL, xdr_void, NULL, wait); 301 if (rpc_stat != RPC_SUCCESS) { 302 zcmn_err(getzoneid(), CE_WARN, 303 "OP_NULL failed to transmit " 304 " on callback connection " 305 "status: 0x%x, cbconn thread exiting", rpc_stat); 306 break; 307 } 308 mutex_enter(&cpr_lock); 309 CALLB_CPR_SAFE_BEGIN(&cpr_info); 310 mutex_exit(&cpr_lock); 311 312 mutex_enter(&cbi->cb_cbconn_lock); 313 (void) cv_timedwait(&cbi->cb_cbconn_wait, 314 &cbi->cb_cbconn_lock, tick_delay + lbolt); 315 mutex_exit(&cbi->cb_cbconn_lock); 316 317 mutex_enter(&cpr_lock); 318 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 319 mutex_exit(&cpr_lock); 320 } 321 322 nfs4_server_rele(np); 323 nfs41_cbinfo_rele(cbi); 324 mutex_enter(&cpr_lock); 325 CALLB_CPR_EXIT(&cpr_info); 326 cv_signal(&cbi->cb_destroy_wait); 327 zthread_exit(); 328 } 329 330 static void 331 cb_sequence(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 332 struct compound_state *cs, struct nfs4_callback_globals *ncg) 333 { 334 nfs4_server_t *np; 335 nfs41_cb_slot_t *cslot; 336 337 CB_SEQUENCE4args *args = &argop->nfs_cb_argop4_u.opcbsequence; 338 CB_SEQUENCE4res *resp = &resop->nfs_cb_resop4_u.opcbsequence; 339 340 ncg->nfs4_callback_stats.cb_getattr.value.ui64++; 341 342 mutex_enter(&ncg->nfs4_cb_lock); 343 np = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 344 mutex_exit(&ncg->nfs4_cb_lock); 345 if (nfs4_server_vlock(np, 0) == FALSE) { 346 CB_WARN("cb_sequence: cannot find server\n"); 347 *cs->statusp = resp->csr_status = NFS4ERR_BADHANDLE; 348 return; 349 } 350 351 bcopy(&args->csa_sessionid, 352 &resp->CB_SEQUENCE4res_u.csr_resok4.csr_sessionid, 353 sizeof (args->csa_sessionid)); 354 resp->CB_SEQUENCE4res_u.csr_resok4.csr_slotid = args->csa_slotid; 355 resp->CB_SEQUENCE4res_u.csr_resok4.csr_sequenceid = 356 args->csa_sequenceid; 357 resp->CB_SEQUENCE4res_u.csr_resok4.csr_highest_slotid = 358 args->csa_highest_slotid; 359 resp->CB_SEQUENCE4res_u.csr_resok4.csr_target_highest_slotid = 360 args->csa_highest_slotid; 361 362 if (bcmp(&args->csa_sessionid, &np->ssx.sessionid, 363 sizeof (np->ssx.sessionid)) != 0) { 364 CB_WARN("cb_sequence: Bad Sequence Id\n"); 365 *cs->statusp = resp->csr_status = NFS4ERR_BADSESSION; 366 mutex_exit(&np->s_lock); 367 nfs4_server_rele(np); 368 return; 369 } 370 371 if (args->csa_slotid >= np->ssx.cb_slot_table_size) { 372 CB_WARN("cb_sequence: Bad Slotid\n"); 373 *cs->statusp = resp->csr_status = NFS4ERR_BADSLOT; 374 mutex_exit(&np->s_lock); 375 nfs4_server_rele(np); 376 return; 377 } 378 379 cslot = np->ssx.cb_slot_table[args->csa_slotid]; 380 381 if (args->csa_sequenceid != cslot->cb_seq + 1 || (cslot->cb_inuse)) { 382 CB_WARN("cb_sequence: Bad Sequence\n"); 383 *cs->statusp = resp->csr_status = NFS4ERR_SEQ_MISORDERED; 384 mutex_exit(&np->s_lock); 385 nfs4_server_rele(np); 386 return; 387 } 388 389 cslot->cb_seq = args->csa_sequenceid; 390 /* 391 * todo: need to set inuse and deal with server having 392 * multiple callbacks in-flight. 393 */ 394 395 *cs->statusp = resp->csr_status = NFS4_OK; 396 mutex_exit(&np->s_lock); 397 nfs4_server_rele(np); 398 } 399 400 static void 401 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 402 struct compound_state *cs, struct nfs4_callback_globals *ncg) 403 { 404 CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr; 405 CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr; 406 rnode4_t *rp; 407 vnode_t *vp; 408 bool_t found = FALSE; 409 struct nfs4_server *sp; 410 struct fattr4 *fap; 411 rpc_inline_t *fdata; 412 long mapcnt; 413 fattr4_change change; 414 fattr4_size size; 415 uint_t rflag; 416 417 ncg->nfs4_callback_stats.cb_getattr.value.ui64++; 418 419 #ifdef DEBUG 420 /* 421 * error injection hook: set cb_getattr_fail global to 422 * NFS4 pcol error to be returned 423 */ 424 if (cb4_getattr_fail != NFS4_OK) { 425 *cs->statusp = resp->status = cb4_getattr_fail; 426 return; 427 } 428 #endif 429 430 resp->obj_attributes.attrmask = 431 NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 432 433 mutex_enter(&ncg->nfs4_cb_lock); 434 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 435 mutex_exit(&ncg->nfs4_cb_lock); 436 437 if (nfs4_server_vlock(sp, 0) == FALSE) { 438 439 CB_WARN("cb_getattr: cannot find server\n"); 440 441 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 442 return; 443 } 444 445 /* 446 * In cb_compound, callback_ident was validated against rq_prog, 447 * but we couldn't verify that it was set to the value we provided 448 * at setclientid time (because we didn't have server struct yet). 449 * Now we have the server struct, but don't have callback_ident 450 * handy. So, validate server struct program number against req 451 * RPC's prog number. At this point, we know the RPC prog num 452 * is valid (else we wouldn't be here); however, we don't know 453 * that it was the prog number we supplied to this server at 454 * setclientid time. If the prog numbers aren't equivalent, then 455 * log the problem and fail the request because either cbserv 456 * and/or cbclient are confused. This will probably never happen. 457 */ 458 if (sp->s_program != req->rq_prog) { 459 #ifdef DEBUG 460 zcmn_err(getzoneid(), CE_WARN, 461 "cb_getattr: wrong server program number srv=%d req=%d\n", 462 sp->s_program, req->rq_prog); 463 #else 464 zcmn_err(getzoneid(), CE_WARN, 465 "cb_getattr: wrong server program number\n"); 466 #endif 467 mutex_exit(&sp->s_lock); 468 nfs4_server_rele(sp); 469 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 470 return; 471 } 472 473 /* 474 * Search the delegation list for a matching file handle; 475 * mutex on sp prevents the list from changing. 476 */ 477 478 rp = list_head(&sp->s_deleg_list); 479 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) { 480 nfs4_fhandle_t fhandle; 481 482 sfh4_copyval(rp->r_fh, &fhandle); 483 484 if ((fhandle.fh_len == args->fh.nfs_fh4_len && 485 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val, 486 fhandle.fh_len) == 0)) { 487 488 found = TRUE; 489 break; 490 } 491 #ifdef DEBUG 492 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len && 493 bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val, 494 args->fh.nfs_fh4_len) == 0) { 495 496 found = TRUE; 497 break; 498 } 499 #endif 500 } 501 502 /* 503 * VN_HOLD the vnode before releasing s_lock to guarantee 504 * we have a valid vnode reference. 505 */ 506 if (found == TRUE) { 507 vp = RTOV4(rp); 508 VN_HOLD(vp); 509 } 510 511 mutex_exit(&sp->s_lock); 512 nfs4_server_rele(sp); 513 514 if (found == FALSE) { 515 516 CB_WARN("cb_getattr: bad fhandle\n"); 517 518 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 519 return; 520 } 521 522 /* 523 * Figure out which attributes the server wants. We only 524 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest. 525 */ 526 fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP); 527 528 /* 529 * Don't actually need to create XDR to encode these 530 * simple data structures. 531 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE); 532 */ 533 fap = &resp->obj_attributes; 534 535 fap->attrmask = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 536 /* attrlist4_len starts at 0 and increases as attrs are processed */ 537 fap->attrlist4 = (char *)fdata; 538 fap->attrlist4_len = 0; 539 540 if (ATTR_ISSET(args->attr_request, CHANGE)) { 541 /* 542 * If the file is mmapped, then increment the change 543 * attribute and return it. This will guarantee that 544 * the server will perceive that the file has changed 545 * if there is any chance that the client application 546 * has changed it. Otherwise, just return the change 547 * attribute as it has been updated by nfs4write_deleg. 548 */ 549 550 mutex_enter(&rp->r_statelock); 551 mapcnt = rp->r_mapcnt; 552 rflag = rp->r_flags; 553 mutex_exit(&rp->r_statelock); 554 555 mutex_enter(&rp->r_statev4_lock); 556 /* 557 * If object mapped, then always return new change. 558 * Otherwise, return change if object has dirty 559 * pages. If object doesn't have any dirty pages, 560 * then all changes have been pushed to server, so 561 * reset change to grant change. 562 */ 563 if (mapcnt) 564 rp->r_deleg_change++; 565 else if (! (rflag & R4DIRTY)) 566 rp->r_deleg_change = rp->r_deleg_change_grant; 567 change = rp->r_deleg_change; 568 mutex_exit(&rp->r_statev4_lock); 569 570 /* 571 * Use inline XDR code directly, we know that we 572 * going to a memory buffer and it has enough 573 * space so it cannot fail. 574 */ 575 IXDR_PUT_U_HYPER(fdata, change); 576 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT; 577 ATTR_SET(fap->attrmask, CHANGE); 578 } 579 580 if (ATTR_ISSET(args->attr_request, SIZE)) { 581 /* 582 * Use an atomic add of 0 to fetch a consistent view 583 * of r_size; this avoids having to take rw_lock 584 * which could cause a deadlock. 585 */ 586 size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0); 587 588 /* 589 * Use inline XDR code directly, we know that we 590 * going to a memory buffer and it has enough 591 * space so it cannot fail. 592 */ 593 IXDR_PUT_U_HYPER(fdata, size); 594 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT; 595 ATTR_SET(fap->attrmask, SIZE); 596 } 597 598 VN_RELE(vp); 599 600 *cs->statusp = resp->status = NFS4_OK; 601 } 602 603 static void 604 cb_getattr_free(nfs_cb_resop4 *resop) 605 { 606 if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4) 607 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr. 608 obj_attributes.attrlist4, cb_getattr_bytes); 609 } 610 611 static nfsstat4 612 layoutrecall_all(nfs4_server_t *np) 613 { 614 vnode_t *vp; 615 rnode4_t *rp; 616 mntinfo4_t *mi = NULL; 617 nfs4_fsidlt_t *ltp; 618 nfsstat4 nstatus = NFS4ERR_NOMATCHING_LAYOUT; 619 620 /* 621 * Walk thru all of the layout trees, and discard all 622 * all the layouts, effectively discarding all the layouts 623 * from this particular server, then do LAYOUTRETURN4_ALL. 624 */ 625 mutex_enter(&np->s_lt_lock); 626 for (ltp = avl_first(&np->s_fsidlt); ltp; 627 ltp = AVL_NEXT(&np->s_fsidlt, ltp)) { 628 mutex_enter(<p->lt_rlt_lock); 629 for (rp = avl_first(<p->lt_rlayout_tree); rp; 630 rp = AVL_NEXT(<p->lt_rlayout_tree, rp)) { 631 632 vp = RTOV4(rp); 633 VN_HOLD(vp); 634 pnfs_layout_discard(rp, ltp, np); 635 /* 636 * Hold the mi to prevent it from disappearing 637 * after we drop the reference on the vnode. This 638 * will remain held until we send the request down 639 * the taskq. 640 */ 641 if (mi == NULL) { 642 mi = VTOMI4(vp); 643 MI4_HOLD(mi); 644 } 645 VN_RELE(vp); 646 nstatus = NFS4_OK; 647 } 648 mutex_exit(<p->lt_rlt_lock); 649 } 650 mutex_exit(&np->s_lt_lock); 651 if (nstatus == NFS4_OK) { 652 pnfs_layoutreturn_bulk(mi, kcred, LAYOUTRETURN4_ALL); 653 MI4_RELE(mi); 654 } 655 return (nstatus); 656 } 657 658 659 static nfsstat4 660 layoutrecall_fsid(fsid4 *recallfsid, nfs4_server_t *np) 661 { 662 vnode_t *vp; 663 rnode4_t *rp; 664 mntinfo4_t *mi = NULL; 665 nfs4_fsidlt_t *ltp, lt; 666 nfsstat4 nstatus = NFS4ERR_NOMATCHING_LAYOUT; 667 668 lt.lt_fsid.major = recallfsid->major; 669 lt.lt_fsid.minor = recallfsid->minor; 670 671 mutex_enter(&np->s_lt_lock); 672 ltp = avl_find(&np->s_fsidlt, <, NULL); 673 674 /* 675 * If no matching fsid layout tree is found, then no layouts exist 676 * for this fsid. 677 */ 678 if (ltp == NULL) { 679 mutex_exit(&np->s_lt_lock); 680 return (nstatus); 681 } 682 683 /* 684 * Found a matching fsid tree, return and free all 685 * layouts on this tree. 686 */ 687 mutex_enter(<p->lt_rlt_lock); 688 mutex_exit(&np->s_lt_lock); 689 690 for (rp = avl_first(<p->lt_rlayout_tree); rp; 691 rp = AVL_NEXT(<p->lt_rlayout_tree, rp)) { 692 /* 693 * For each rnode on this fsid's layout tree, 694 * discard the layout. We do not return each 695 * layout individually, instead we return in 696 * bulk, at the end. 697 */ 698 vp = RTOV4(rp); 699 VN_HOLD(vp); 700 pnfs_layout_discard(rp, ltp, np); 701 if (mi == NULL) { 702 mi = VTOMI4(vp); 703 MI4_HOLD(mi); 704 } 705 VN_RELE(vp); 706 nstatus = NFS4_OK; 707 } 708 mutex_exit(<p->lt_rlt_lock); 709 if (nstatus == NFS4_OK) { 710 pnfs_layoutreturn_bulk(mi, kcred, LAYOUTRETURN4_FSID); 711 MI4_RELE(mi); 712 } 713 return (nstatus); 714 } 715 716 static nfsstat4 717 layoutrecall_file(layoutrecall_file4 *lrf, nfs4_server_t *np) 718 { 719 nfs_fh4 *rawfh = &lrf->lor_fh; 720 nfs4_sharedfh_t sfh; 721 vnode_t *vp; 722 rnode4_t lrp, *rp; 723 nfs4_fsidlt_t *ltp; 724 nfsstat4 nstatus = NFS4ERR_NOMATCHING_LAYOUT; 725 726 bcopy(rawfh, &sfh, sizeof (*rawfh)); 727 lrp.r_fh = &sfh; 728 729 mutex_enter(&np->s_lt_lock); 730 /* 731 * Look thru the fsid layout trees until we find a matching 732 * rnode on an fsid layout tree's rnode layout tree. 733 */ 734 for (ltp = avl_first(&np->s_fsidlt); ltp; 735 ltp = AVL_NEXT(&np->s_fsidlt, ltp)) { 736 /* 737 * Look at this fsid layout tree's rnode layout tree 738 * and see if it has the rnode we want based on the 739 * file handle. 740 */ 741 mutex_enter(<p->lt_rlt_lock); 742 rp = avl_find(<p->lt_rlayout_tree, &lrp, NULL); 743 if (rp != NULL) { 744 vp = RTOV4(rp); 745 VN_HOLD(vp); 746 mutex_enter(&rp->r_statelock); 747 /* 748 * Since this client will only hold one layout 749 * for an rnode at a time, if we get a 750 * layoutrecall, the stateid it has should match 751 * ours!. 752 */ 753 if (lrf->lor_stateid.seqid != 754 rp->r_lostateid.seqid + 1) { 755 cmn_err(CE_WARN, "our layout stateids are" 756 "out of sync! rnode: %p", (void *)rp); 757 } 758 pnfs_layout_return(vp, kcred, lrf->lor_stateid, 759 LR_ASYNC); 760 mutex_exit(&rp->r_statelock); 761 mutex_exit(<p->lt_rlt_lock); 762 VN_RELE(vp); 763 nstatus = NFS4_OK; 764 break; 765 } 766 mutex_exit(<p->lt_rlt_lock); 767 } 768 mutex_exit(&np->s_lt_lock); 769 return (nstatus); 770 } 771 772 static void 773 cb_layoutrecall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 774 struct compound_state *cs, struct nfs4_callback_globals *ncg) 775 { 776 CB_LAYOUTRECALL4args *args = &argop->nfs_cb_argop4_u.opcblayoutrecall; 777 CB_LAYOUTRECALL4res *resp = &resop->nfs_cb_resop4_u.opcblayoutrecall; 778 struct nfs4_server *sp; 779 780 if (args->clora_type != LAYOUT4_NFSV4_1_FILES) { 781 DTRACE_PROBE1(nfsc__i__badlayoutype, int32_t, 782 args->clora_type); 783 *cs->statusp = resp->clorr_status = NFS4ERR_INVAL; 784 return; 785 } 786 787 mutex_enter(&ncg->nfs4_cb_lock); 788 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 789 mutex_exit(&ncg->nfs4_cb_lock); 790 791 if (nfs4_server_vlock(sp, 0) == FALSE) { 792 DTRACE_PROBE1(nfsc__i__bad_prog, int, req->rq_prog); 793 *cs->statusp = resp->clorr_status = NFS4ERR_NOMATCHING_LAYOUT; 794 return; 795 } 796 mutex_exit(&sp->s_lock); 797 798 switch (args->clora_recall.lor_recalltype) { 799 case LAYOUTRECALL4_FILE: 800 *cs->statusp = resp->clorr_status = 801 layoutrecall_file(&args->clora_recall. 802 layoutrecall4_u.lor_layout, sp); 803 break; 804 case LAYOUTRECALL4_FSID: 805 *cs->statusp = resp->clorr_status = 806 layoutrecall_fsid(&args->clora_recall. 807 layoutrecall4_u.lor_fsid, sp); 808 break; 809 case LAYOUTRECALL4_ALL: 810 *cs->statusp = resp->clorr_status = layoutrecall_all(sp); 811 break; 812 default: 813 *cs->statusp = resp->clorr_status = NFS4ERR_INVAL; 814 } 815 nfs4_server_rele(sp); 816 817 if (resp->clorr_status != NFS4_OK) 818 DTRACE_PROBE2(nfsc__i__cblayouterr, 819 nfs4_server_t *, sp, nfsstat, resp->clorr_status); 820 } 821 822 static nfsstat4 823 cb_notify_device(nfs4_server_t *sp, notify4 *no) 824 { 825 nfsstat4 stat = NFS4_OK; 826 XDR x; 827 notify_deviceid_change4 ndc; 828 notify_deviceid_delete4 ndd; 829 830 /* check for missing or extra bits */ 831 if ((no->notify_mask & 832 ~(NOTIFY_DEVICEID4_CHANGE_MASK|NOTIFY_DEVICEID4_DELETE_MASK)) || 833 (no->notify_mask == 0)) 834 DTRACE_PROBE1(nfsc__i__bad_mask, bitmap4 *, no->notify_mask); 835 836 xdrmem_create(&x, no->notify_vals.notifylist4_val, 837 no->notify_vals.notifylist4_len, XDR_DECODE); 838 /* 839 * The order of checking is significant. Oddly, both bits 840 * could be set. 841 */ 842 if (no->notify_mask & NOTIFY_DEVICEID4_CHANGE_MASK) { 843 844 if (!xdr_notify_deviceid_change4(&x, &ndc)) 845 stat = NFS4ERR_BADXDR; 846 else { 847 stat = pnfs_change_device(sp, &ndc); 848 xdr_free(xdr_notify_deviceid_change4, (caddr_t)&ndc); 849 } 850 } 851 if (stat == NFS4_OK && 852 (no->notify_mask & NOTIFY_DEVICEID4_DELETE_MASK)) { 853 854 if (!xdr_notify_deviceid_delete4(&x, &ndd)) 855 stat = NFS4ERR_BADXDR; 856 else { 857 stat = pnfs_delete_device(sp, &ndd); 858 xdr_free(xdr_notify_deviceid_change4, (caddr_t)&ndd); 859 } 860 } 861 862 return (stat); 863 } 864 865 static void 866 cb_notify_deviceid(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, 867 struct svc_req *req, struct compound_state *cs, 868 struct nfs4_callback_globals *ncg) 869 { 870 CB_NOTIFY_DEVICEID4args *args = 871 &argop->nfs_cb_argop4_u.opcbnotify_deviceid; 872 CB_NOTIFY_DEVICEID4res *resp = 873 &resop->nfs_cb_resop4_u.opcbnotify_deviceid; 874 struct nfs4_server *sp; 875 int i; 876 nfsstat4 stat; 877 878 mutex_enter(&ncg->nfs4_cb_lock); 879 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 880 mutex_exit(&ncg->nfs4_cb_lock); 881 882 if (nfs4_server_vlock(sp, 0) == FALSE) { 883 DTRACE_PROBE1(nfsc__i__bad_prog, int, req->rq_prog); 884 *cs->statusp = resp->cndr_status = NFS4ERR_INVAL; 885 return; 886 } 887 mutex_exit(&sp->s_lock); 888 889 stat = NFS4_OK; 890 for (i = 0; i < args->cnda_changes.cnda_changes_len; i++) 891 if ((stat = cb_notify_device(sp, 892 &args->cnda_changes.cnda_changes_val[i])) != NFS4_OK) 893 break; 894 895 *cs->statusp = resp->cndr_status = stat; 896 nfs4_server_rele(sp); 897 } 898 899 900 static void 901 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 902 struct compound_state *cs, struct nfs4_callback_globals *ncg) 903 { 904 CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall; 905 CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall; 906 rnode4_t *rp; 907 vnode_t *vp; 908 struct nfs4_server *sp; 909 bool_t found = FALSE; 910 911 ncg->nfs4_callback_stats.cb_recall.value.ui64++; 912 913 ASSERT(req->rq_prog >= NFS4_CALLBACK); 914 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums); 915 916 #ifdef DEBUG 917 /* 918 * error injection hook: set cb_recall_fail global to 919 * NFS4 pcol error to be returned 920 */ 921 if (cb4_recall_fail != NFS4_OK) { 922 *cs->statusp = resp->status = cb4_recall_fail; 923 return; 924 } 925 #endif 926 927 mutex_enter(&ncg->nfs4_cb_lock); 928 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 929 mutex_exit(&ncg->nfs4_cb_lock); 930 931 if (nfs4_server_vlock(sp, 0) == FALSE) { 932 933 CB_WARN("cb_recall: cannot find server\n"); 934 935 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 936 return; 937 } 938 939 /* 940 * Search the delegation list for a matching file handle 941 * AND stateid; mutex on sp prevents the list from changing. 942 */ 943 944 rp = list_head(&sp->s_deleg_list); 945 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) { 946 mutex_enter(&rp->r_statev4_lock); 947 948 /* check both state id and file handle! */ 949 950 if ((bcmp(&rp->r_deleg_stateid, &args->stateid, 951 sizeof (stateid4)) == 0)) { 952 nfs4_fhandle_t fhandle; 953 954 sfh4_copyval(rp->r_fh, &fhandle); 955 if ((fhandle.fh_len == args->fh.nfs_fh4_len && 956 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val, 957 fhandle.fh_len) == 0)) { 958 959 found = TRUE; 960 break; 961 } else { 962 #ifdef DEBUG 963 CB_WARN("cb_recall: stateid OK, bad fh"); 964 #endif 965 } 966 } 967 #ifdef DEBUG 968 if (bcmp(&args->stateid, &nfs4_deleg_any, 969 sizeof (stateid4)) == 0) { 970 971 found = TRUE; 972 break; 973 } 974 #endif 975 mutex_exit(&rp->r_statev4_lock); 976 } 977 978 /* 979 * VN_HOLD the vnode before releasing s_lock to guarantee 980 * we have a valid vnode reference. The async thread will 981 * release the hold when it's done. 982 */ 983 if (found == TRUE) { 984 mutex_exit(&rp->r_statev4_lock); 985 vp = RTOV4(rp); 986 VN_HOLD(vp); 987 } 988 mutex_exit(&sp->s_lock); 989 nfs4_server_rele(sp); 990 991 if (found == FALSE) { 992 993 CB_WARN("cb_recall: bad stateid\n"); 994 995 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 996 return; 997 } 998 999 /* Fire up a thread to do the delegreturn */ 1000 nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN, 1001 args->truncate); 1002 1003 *cs->statusp = resp->status = 0; 1004 } 1005 1006 /* ARGSUSED */ 1007 static void 1008 cb_recall_free(nfs_cb_resop4 *resop) 1009 { 1010 /* nothing to do here, cb_recall doesn't kmem_alloc */ 1011 } 1012 1013 /* 1014 * This function handles the CB_NULL proc call from an NFSv4 Server. 1015 * 1016 * We take note that the server has sent a CB_NULL for later processing 1017 * in the recovery logic. It is noted so we may pause slightly after the 1018 * setclientid and before reopening files. The pause is to allow the 1019 * NFSv4 Server time to receive the CB_NULL reply and adjust any of 1020 * its internal structures such that it has the opportunity to grant 1021 * delegations to reopened files. 1022 * 1023 */ 1024 1025 /* ARGSUSED */ 1026 static void 1027 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, 1028 struct nfs4_callback_globals *ncg) 1029 { 1030 struct nfs4_server *sp; 1031 1032 ncg->nfs4_callback_stats.cb_null.value.ui64++; 1033 1034 ASSERT(req->rq_prog >= NFS4_CALLBACK); 1035 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums); 1036 1037 mutex_enter(&ncg->nfs4_cb_lock); 1038 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 1039 mutex_exit(&ncg->nfs4_cb_lock); 1040 1041 if (nfs4_server_vlock(sp, 0) != FALSE) { 1042 sp->s_flags |= N4S_CB_PINGED; 1043 cv_broadcast(&sp->wait_cb_null); 1044 mutex_exit(&sp->s_lock); 1045 nfs4_server_rele(sp); 1046 } 1047 } 1048 1049 /* 1050 * cb_illegal args: void 1051 * res : status (NFS4ERR_OP_CB_ILLEGAL) 1052 */ 1053 /* ARGSUSED */ 1054 static void 1055 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 1056 struct compound_state *cs, struct nfs4_callback_globals *ncg) 1057 { 1058 CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal; 1059 1060 ncg->nfs4_callback_stats.cb_illegal.value.ui64++; 1061 resop->resop = OP_CB_ILLEGAL; 1062 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 1063 } 1064 1065 static void 1066 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, 1067 struct nfs4_callback_globals *ncg) 1068 { 1069 uint_t i; 1070 struct compound_state cs; 1071 nfs_cb_argop4 *argop; 1072 nfs_cb_resop4 *resop, *new_res; 1073 uint_t op, mvers_0; 1074 boolean_t sequenced = FALSE; 1075 1076 bzero(&cs, sizeof (cs)); 1077 cs.statusp = &resp->status; 1078 cs.cont = TRUE; 1079 1080 /* 1081 * Form a reply tag by copying over the reqeuest tag. 1082 */ 1083 resp->tag.utf8string_len = args->tag.utf8string_len; 1084 resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len, 1085 KM_SLEEP); 1086 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 1087 args->tag.utf8string_len); 1088 1089 /* 1090 * minorversion should be zero or one 1091 */ 1092 if (args->minorversion != CB4_MINOR_v0 && 1093 args->minorversion != CB4_MINOR_v1) { 1094 resp->array_len = 0; 1095 resp->array = NULL; 1096 resp->status = NFS4ERR_MINOR_VERS_MISMATCH; 1097 return; 1098 } 1099 1100 /* 1101 * The XDR code for CB_COMPOUND decodes all cb ops regardless 1102 * of the minorversion of the compound containing the ops. 1103 * 1104 * mvers_0 is used to validate ops according to minor version: 1105 * - only mvers 0 cb ops are allowed in mv 0 cb compounds 1106 * - "is sequenced" checks only apply to mv 1 cb compunds 1107 */ 1108 mvers_0 = (args->minorversion == CB4_MINOR_v0); 1109 1110 #ifdef DEBUG 1111 /* 1112 * Verify callback_ident. It doesn't really matter if it's wrong 1113 * because we don't really use callback_ident -- we use prog number 1114 * of the RPC request instead. In this case, just print a DEBUG 1115 * console message to reveal brokenness of cbclient (at bkoff/cthon). 1116 */ 1117 if (args->callback_ident != req->rq_prog) 1118 zcmn_err(getzoneid(), CE_WARN, 1119 "cb_compound: cb_client using wrong " 1120 "callback_ident(%d), should be %d", 1121 args->callback_ident, req->rq_prog); 1122 #endif 1123 1124 resp->array_len = args->array_len; 1125 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4), 1126 KM_SLEEP); 1127 1128 for (i = 0; i < args->array_len && cs.cont; i++) { 1129 1130 argop = &args->array[i]; 1131 resop = &resp->array[i]; 1132 resop->resop = argop->argop; 1133 op = (uint_t)resop->resop; 1134 1135 switch (op) { 1136 1137 case OP_CB_SEQUENCE: 1138 1139 if (mvers_0) { 1140 op = OP_CB_ILLEGAL; 1141 cb_illegal(argop, resop, req, &cs, ncg); 1142 break; 1143 } 1144 cb_sequence(argop, resop, req, &cs, ncg); 1145 if (*cs.statusp == NFS4_OK) 1146 sequenced = TRUE; 1147 break; 1148 1149 case OP_CB_GETATTR: 1150 1151 if (!sequenced && !mvers_0) { 1152 *cs.statusp = resp->status = 1153 NFS4ERR_SEQUENCE_POS; 1154 break; 1155 } 1156 cb_getattr(argop, resop, req, &cs, ncg); 1157 break; 1158 1159 case OP_CB_RECALL: 1160 if (!sequenced && !mvers_0) { 1161 *cs.statusp = resp->status = 1162 NFS4ERR_SEQUENCE_POS; 1163 break; 1164 } 1165 cb_recall(argop, resop, req, &cs, ncg); 1166 break; 1167 1168 case OP_CB_LAYOUTRECALL: 1169 if (mvers_0) { 1170 op = OP_CB_ILLEGAL; 1171 cb_illegal(argop, resop, req, &cs, ncg); 1172 break; 1173 } 1174 if (!sequenced) { 1175 *cs.statusp = resp->status = 1176 NFS4ERR_SEQUENCE_POS; 1177 break; 1178 } 1179 cb_layoutrecall(argop, resop, req, &cs, ncg); 1180 break; 1181 1182 case OP_CB_NOTIFY_DEVICEID: 1183 if (mvers_0) { 1184 op = OP_CB_ILLEGAL; 1185 cb_illegal(argop, resop, req, &cs, ncg); 1186 break; 1187 } 1188 if (!sequenced) { 1189 *cs.statusp = resp->status = 1190 NFS4ERR_SEQUENCE_POS; 1191 break; 1192 } 1193 cb_notify_deviceid(argop, resop, req, &cs, ncg); 1194 break; 1195 1196 case OP_CB_ILLEGAL: 1197 if (!sequenced && !mvers_0) { 1198 *cs.statusp = resp->status = 1199 NFS4ERR_SEQUENCE_POS; 1200 break; 1201 } 1202 /* fall through */ 1203 1204 default: 1205 /* 1206 * Handle OP_CB_ILLEGAL and any undefined opcode. 1207 * Currently, the XDR code will return BADXDR 1208 * if cb op doesn't decode to legal value, so 1209 * it really only handles OP_CB_ILLEGAL. 1210 */ 1211 op = OP_CB_ILLEGAL; 1212 cb_illegal(argop, resop, req, &cs, ncg); 1213 } 1214 1215 if (*cs.statusp != NFS4_OK) 1216 cs.cont = FALSE; 1217 1218 /* 1219 * If not at last op, and if we are to stop, then 1220 * compact the results array. 1221 */ 1222 if ((i + 1) < args->array_len && !cs.cont) { 1223 1224 new_res = kmem_alloc( 1225 (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP); 1226 bcopy(resp->array, 1227 new_res, (i+1) * sizeof (nfs_cb_resop4)); 1228 kmem_free(resp->array, 1229 args->array_len * sizeof (nfs_cb_resop4)); 1230 1231 resp->array_len = i + 1; 1232 resp->array = new_res; 1233 } 1234 } 1235 1236 } 1237 1238 static void 1239 cb_compound_free(CB_COMPOUND4res *resp) 1240 { 1241 uint_t i, op; 1242 nfs_cb_resop4 *resop; 1243 1244 if (resp->tag.utf8string_val) { 1245 UTF8STRING_FREE(resp->tag) 1246 } 1247 1248 for (i = 0; i < resp->array_len; i++) { 1249 1250 resop = &resp->array[i]; 1251 op = (uint_t)resop->resop; 1252 1253 switch (op) { 1254 1255 case OP_CB_GETATTR: 1256 1257 cb_getattr_free(resop); 1258 break; 1259 1260 case OP_CB_RECALL: 1261 1262 cb_recall_free(resop); 1263 break; 1264 1265 default: 1266 break; 1267 } 1268 } 1269 1270 if (resp->array != NULL) { 1271 kmem_free(resp->array, 1272 resp->array_len * sizeof (nfs_cb_resop4)); 1273 } 1274 } 1275 1276 static void 1277 cb_dispatch(struct svc_req *req, SVCXPRT *xprt) 1278 { 1279 CB_COMPOUND4args args; 1280 CB_COMPOUND4res res; 1281 struct nfs4_callback_globals *ncg; 1282 1283 bool_t (*xdr_args)(), (*xdr_res)(); 1284 void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *, 1285 struct nfs4_callback_globals *); 1286 void (*freeproc)(CB_COMPOUND4res *); 1287 1288 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1289 ASSERT(ncg != NULL); 1290 1291 ncg->nfs4_callback_stats.cb_dispatch.value.ui64++; 1292 1293 switch (req->rq_proc) { 1294 case CB_NULL: 1295 xdr_args = xdr_void; 1296 xdr_res = xdr_void; 1297 proc = cb_null; 1298 freeproc = NULL; 1299 break; 1300 1301 case CB_COMPOUND: 1302 xdr_args = xdr_CB_COMPOUND4args_clnt; 1303 xdr_res = xdr_CB_COMPOUND4res; 1304 proc = cb_compound; 1305 freeproc = cb_compound_free; 1306 break; 1307 1308 default: 1309 CB_WARN("cb_dispatch: no proc\n"); 1310 svcerr_noproc(xprt); 1311 return; 1312 } 1313 1314 args.tag.utf8string_val = NULL; 1315 args.array = NULL; 1316 1317 if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) { 1318 1319 CB_WARN("cb_dispatch: cannot getargs\n"); 1320 svcerr_decode(xprt); 1321 return; 1322 } 1323 1324 (*proc)(&args, &res, req, ncg); 1325 1326 if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) { 1327 1328 CB_WARN("cb_dispatch: bad sendreply\n"); 1329 svcerr_systemerr(xprt); 1330 } 1331 1332 if (freeproc) 1333 (*freeproc)(&res); 1334 1335 if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) { 1336 1337 CB_WARN("cb_dispatch: bad freeargs\n"); 1338 } 1339 } 1340 1341 static rpcprog_t 1342 nfs4_getnextprogram(struct nfs4_callback_globals *ncg) 1343 { 1344 int i, j; 1345 1346 j = ncg->nfs4_program_hint; 1347 for (i = 0; i < nfs4_num_prognums; i++, j++) { 1348 1349 if (j >= nfs4_num_prognums) 1350 j = 0; 1351 1352 if (ncg->nfs4prog2server[j] == NULL) { 1353 ncg->nfs4_program_hint = j+1; 1354 return (j+NFS4_CALLBACK); 1355 } 1356 } 1357 1358 return (0); 1359 } 1360 1361 void 1362 nfs4callback_destroy(nfs4_server_t *np) 1363 { 1364 struct nfs4_callback_globals *ncg; 1365 struct nfs41_cb_info *cbi; 1366 int i; 1367 1368 if (np->s_program == 0) 1369 return; 1370 1371 ncg = np->zone_globals; 1372 cbi = ncg->nfs4prog2cbinfo[np->s_program - NFS4_CALLBACK]; 1373 1374 i = np->s_program - NFS4_CALLBACK; 1375 1376 mutex_enter(&ncg->nfs4_cb_lock); 1377 1378 ASSERT(ncg->nfs4prog2server[i] == np); 1379 1380 ncg->nfs4prog2server[i] = NULL; 1381 ncg->nfs4prog2cbinfo[i] = NULL; 1382 1383 if (i < ncg->nfs4_program_hint) 1384 ncg->nfs4_program_hint = i; 1385 1386 mutex_exit(&ncg->nfs4_cb_lock); 1387 np->s_program = 0; 1388 if (cbi != NULL) 1389 nfs41_cbinfo_rele(cbi); 1390 } 1391 1392 void 1393 nfs41_cbinfo_rele(struct nfs41_cb_info *cbi) 1394 { 1395 mutex_enter(&cbi->cb_reflock); 1396 cbi->cb_refcnt--; 1397 if (cbi->cb_refcnt > 0) { 1398 mutex_exit(&cbi->cb_reflock); 1399 return; 1400 } 1401 ASSERT(cbi->cb_flags & NFS41_CB_THREAD_EXIT); 1402 ASSERT(cbi->cb_cbconn_exit); 1403 mutex_exit(&cbi->cb_reflock); 1404 1405 cbi->cb_rpc->r_flags |= SVCCB_DEAD; 1406 cv_signal(&cbi->cb_rpc->r_cbwait); /* XXX - See mir_set_cbinfo */ 1407 1408 if (cbi->cb_client) { 1409 if (!(CLNT_CONTROL(cbi->cb_client, 1410 CLSET_BACKCHANNEL_CLEAR, NULL))) { 1411 zcmn_err(getzoneid(), CE_WARN, 1412 "Failed To Clear Client Handle Callback %p", 1413 (void *)cbi->cb_client); 1414 } 1415 CLNT_DESTROY(cbi->cb_client); 1416 } 1417 mutex_destroy(&cbi->cb_cbconn_lock); 1418 cv_destroy(&cbi->cb_destroy_wait); 1419 cv_destroy(&cbi->cb_cbconn_wait); 1420 mutex_destroy(&cbi->cb_reflock); 1421 kmem_free(cbi, sizeof (*cbi)); 1422 } 1423 1424 /* 1425 * nfs4_setport - This function saves a netid and univeral address for 1426 * the callback program. These values will be used during setclientid. 1427 */ 1428 static void 1429 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto, 1430 struct nfs4_callback_globals *ncg) 1431 { 1432 struct nfs4_cb_port *p; 1433 bool_t found = FALSE; 1434 1435 ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock)); 1436 1437 p = list_head(&ncg->nfs4_cb_ports); 1438 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) { 1439 if (strcmp(p->netid, netid) == 0) { 1440 found = TRUE; 1441 break; 1442 } 1443 } 1444 if (found == TRUE) 1445 (void) strcpy(p->uaddr, uaddr); 1446 else { 1447 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1448 1449 (void) strcpy(p->uaddr, uaddr); 1450 (void) strcpy(p->netid, netid); 1451 (void) strcpy(p->protofmly, protofmly); 1452 (void) strcpy(p->proto, proto); 1453 list_insert_head(&ncg->nfs4_cb_ports, p); 1454 } 1455 } 1456 1457 static void 1458 nfs41_callback_thread(struct nfs41_cb_info *cbi) 1459 { 1460 callb_cpr_t cprinfo; 1461 kmutex_t cpr_lock; 1462 SVCXPRT *clone_xprt; 1463 mblk_t *mp; 1464 struct rpc_msg msg; 1465 struct svc_req r; 1466 char *cred_area; 1467 int rqcred_size = 400; /* RQCRED_SIZE */ 1468 SVCCB *cb = cbi->cb_rpc; 1469 1470 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1471 CALLB_CPR_INIT(&cprinfo, &cpr_lock, callb_generic_cpr, 1472 "nfs41_cb"); 1473 1474 mutex_enter(&cbi->cb_rpc->r_lock); 1475 while (!(cbi->cb_flags & NFS41_CB_THREAD_EXIT)) { 1476 mutex_enter(&cpr_lock); 1477 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1478 mutex_exit(&cpr_lock); 1479 1480 cv_wait(&cbi->cb_rpc->r_cbwait, &cbi->cb_rpc->r_lock); 1481 1482 mutex_enter(&cpr_lock); 1483 CALLB_CPR_SAFE_END(&cprinfo, &cpr_lock); 1484 mutex_exit(&cpr_lock); 1485 1486 if (cbi->cb_flags & NFS41_CB_THREAD_EXIT) 1487 break; 1488 1489 mutex_exit(&cbi->cb_rpc->r_lock); 1490 1491 mutex_enter(&cb->r_mlock); 1492 mp = cb->r_mp; 1493 cb->r_mp = NULL; 1494 mutex_exit(&cb->r_mlock); 1495 1496 clone_xprt = svc_clone_init(); 1497 1498 svc_init_clone_xprt(clone_xprt, cb->r_q); 1499 clone_xprt->xp_master = NULL; 1500 clone_xprt->xp_msg_size = 2048; /* COTS_MAX_ALLOCSIZE */ 1501 cred_area = kmem_zalloc(2 * MAX_AUTH_BYTES + rqcred_size, 1502 KM_SLEEP); 1503 msg.rm_call.cb_cred.oa_base = cred_area; 1504 msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]); 1505 r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]); 1506 1507 /* 1508 * underlying transport recv routine may modify mblk data 1509 * and make it difficult to extract label afterwards. So 1510 * get the label from the raw mblk data now. 1511 */ 1512 if (is_system_labeled()) { 1513 mblk_t *lmp; 1514 1515 r.rq_label = kmem_alloc(sizeof (bslabel_t), KM_SLEEP); 1516 if (DB_CRED(mp) != NULL) 1517 lmp = mp; 1518 else { 1519 ASSERT(mp->b_cont != NULL); 1520 lmp = mp->b_cont; 1521 ASSERT(DB_CRED(lmp) != NULL); 1522 } 1523 bcopy(label2bslabel(crgetlabel(DB_CRED(lmp))), r.rq_label, 1524 sizeof (bslabel_t)); 1525 } else { 1526 r.rq_label = NULL; 1527 } 1528 1529 /* 1530 * Now receive the message. 1531 */ 1532 if (SVC_RECV(clone_xprt, mp, &msg)) { 1533 void (*dispatchroutine) (struct svc_req *, SVCXPRT *); 1534 bool_t no_dispatch; 1535 enum auth_stat why; 1536 1537 /* 1538 * Find the registered program and call its 1539 * dispatch routine. 1540 */ 1541 r.rq_xprt = clone_xprt; 1542 r.rq_prog = msg.rm_call.cb_prog; 1543 r.rq_vers = msg.rm_call.cb_vers; 1544 r.rq_proc = msg.rm_call.cb_proc; 1545 r.rq_cred = msg.rm_call.cb_cred; 1546 1547 if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != 1548 AUTH_OK) { 1549 svcerr_auth(clone_xprt, why); 1550 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1551 } else if (no_dispatch) { 1552 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1553 } else { 1554 if (r.rq_vers >= cbi->cb_versmin && 1555 r.rq_vers <= cbi->cb_versmax) { 1556 dispatchroutine = cbi->cb_callback; 1557 (*dispatchroutine) (&r, clone_xprt); 1558 } else { 1559 svcerr_progvers(clone_xprt, 1560 cbi->cb_versmin, 1561 cbi->cb_versmax); 1562 } 1563 (void) SVC_FREEARGS(clone_xprt, NULL, NULL); 1564 } 1565 if (r.rq_cred.oa_flavor == RPCSEC_GSS) 1566 rpc_gss_cleanup(clone_xprt); 1567 } 1568 if (r.rq_label != NULL) 1569 kmem_free(r.rq_label, sizeof (bslabel_t)); 1570 mutex_enter(&cbi->cb_rpc->r_lock); 1571 } 1572 cbi->cb_thread = NULL; 1573 mutex_exit(&cbi->cb_rpc->r_lock); 1574 mutex_enter(&cpr_lock); 1575 CALLB_CPR_EXIT(&cprinfo); 1576 1577 nfs41_cbinfo_rele(cbi); 1578 1579 /* 1580 * Signal destroy_session that we are done. 1581 */ 1582 cv_signal(&cbi->cb_destroy_wait); 1583 1584 zthread_exit(); 1585 } 1586 1587 1588 /* 1589 * nfs4_cb_args - This function is used to construct the callback 1590 * portion of the arguments needed for setclientid. 1591 */ 1592 1593 void 1594 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args) 1595 { 1596 struct nfs4_cb_port *p; 1597 bool_t found = FALSE; 1598 rpcprog_t pgm; 1599 struct nfs4_callback_globals *ncg = np->zone_globals; 1600 1601 /* 1602 * This server structure may already have a program number 1603 * assigned to it. This happens when the client has to 1604 * re-issue SETCLIENTID. Just re-use the information. 1605 */ 1606 if (np->s_program >= NFS4_CALLBACK && 1607 np->s_program < NFS4_CALLBACK + nfs4_num_prognums) 1608 nfs4callback_destroy(np); 1609 1610 mutex_enter(&ncg->nfs4_cb_lock); 1611 1612 p = list_head(&ncg->nfs4_cb_ports); 1613 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) { 1614 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 && 1615 strcmp(p->proto, knc->knc_proto) == 0) { 1616 found = TRUE; 1617 break; 1618 } 1619 } 1620 1621 if (found == FALSE) { 1622 1623 NFS4_DEBUG(nfs4_callback_debug, 1624 (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n", 1625 knc->knc_protofmly, knc->knc_proto)); 1626 1627 args->callback.cb_program = 0; 1628 args->callback.cb_location.r_netid = NULL; 1629 args->callback.cb_location.r_addr = NULL; 1630 args->callback_ident = 0; 1631 mutex_exit(&ncg->nfs4_cb_lock); 1632 return; 1633 } 1634 1635 if ((pgm = nfs4_getnextprogram(ncg)) == 0) { 1636 CB_WARN("nfs4_cb_args: out of program numbers\n"); 1637 1638 args->callback.cb_program = 0; 1639 args->callback.cb_location.r_netid = NULL; 1640 args->callback.cb_location.r_addr = NULL; 1641 args->callback_ident = 0; 1642 mutex_exit(&ncg->nfs4_cb_lock); 1643 return; 1644 } 1645 1646 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np; 1647 args->callback.cb_program = pgm; 1648 args->callback.cb_location.r_netid = p->netid; 1649 args->callback.cb_location.r_addr = p->uaddr; 1650 args->callback_ident = pgm; 1651 1652 np->s_program = pgm; 1653 1654 mutex_exit(&ncg->nfs4_cb_lock); 1655 } 1656 1657 /* 1658 * nfs4_cb_args - This function is used to construct the callback 1659 * portion of the arguments needed for create_session. 1660 */ 1661 /* ARGSUSED */ 1662 void 1663 nfs41_cb_args(nfs4_server_t *np, struct knetconfig *knc, 1664 CREATE_SESSION4args *args) 1665 { 1666 rpcprog_t pgm; 1667 struct nfs4_callback_globals *ncg = np->zone_globals; 1668 struct nfs41_cb_info *cbi; 1669 1670 /* 1671 * This server structure may already have a program number 1672 * assigned to it. This happens when the client has to 1673 * re-issue SETCLIENTID. Just re-use the information. 1674 */ 1675 if (np->s_program >= NFS4_CALLBACK && 1676 np->s_program < NFS4_CALLBACK + nfs4_num_prognums) 1677 nfs4callback_destroy(np); 1678 1679 mutex_enter(&ncg->nfs4_cb_lock); 1680 1681 if ((pgm = nfs4_getnextprogram(ncg)) == 0) { 1682 CB_WARN("nfs4_cb_args: out of program numbers\n"); 1683 1684 args->csa_cb_program = 0; 1685 args->csa_sec_parms.csa_sec_parms_len = 0; 1686 args->csa_sec_parms.csa_sec_parms_val = NULL; 1687 mutex_exit(&ncg->nfs4_cb_lock); 1688 return; 1689 } 1690 1691 if (ncg->nfs4prog2cbinfo[pgm-NFS4_CALLBACK] == NULL) 1692 cbi = kmem_zalloc(sizeof (struct nfs41_cb_info), KM_SLEEP); 1693 else 1694 cbi = ncg->nfs4prog2cbinfo[pgm-NFS4_CALLBACK]; 1695 1696 cbi->cb_prog = pgm; 1697 cbi->cb_versmin = NFS_CB; 1698 cbi->cb_versmax = NFS_CB; 1699 cbi->cb_callback = cb_dispatch; 1700 1701 cv_init(&cbi->cb_destroy_wait, NULL, CV_DEFAULT, NULL); 1702 mutex_init(&cbi->cb_reflock, NULL, MUTEX_DEFAULT, NULL); 1703 1704 cv_init(&cbi->cb_cbconn_wait, NULL, CV_DEFAULT, NULL); 1705 mutex_init(&cbi->cb_cbconn_lock, NULL, MUTEX_DEFAULT, NULL); 1706 1707 /* 1708 * set cb_refcnt to 2, 1 to account for it being in the 1709 * nfs4prog2cbinfo table, and another for the nfs41_callback_thread. 1710 */ 1711 cbi->cb_refcnt = 2; 1712 ncg->nfs4prog2cbinfo[pgm-NFS4_CALLBACK] = cbi; 1713 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np; 1714 np->s_program = pgm; 1715 mutex_exit(&ncg->nfs4_cb_lock); 1716 1717 args->csa_cb_program = pgm; 1718 args->csa_sec_parms.csa_sec_parms_len = 1; 1719 args->csa_sec_parms.csa_sec_parms_val = (callback_sec_parms4 *) 1720 kmem_zalloc(sizeof (callback_sec_parms4), KM_SLEEP); 1721 args->csa_sec_parms.csa_sec_parms_val->cb_secflavor = AUTH_NONE; 1722 cbi->cb_rpc = kmem_zalloc(sizeof (SVCCB), KM_SLEEP); 1723 mutex_init(&cbi->cb_rpc->r_lock, NULL, MUTEX_DEFAULT, NULL); 1724 mutex_init(&cbi->cb_rpc->r_mlock, NULL, MUTEX_DEFAULT, NULL); 1725 cv_init(&cbi->cb_rpc->r_cbwait, NULL, CV_DEFAULT, NULL); 1726 cbi->cb_rpc->r_prog = pgm; 1727 if (!cbi->cb_thread) { 1728 cbi->cb_thread = zthread_create(NULL, 0, 1729 nfs41_callback_thread, 1730 cbi, 0, minclsyspri); 1731 ASSERT(cbi->cb_thread != NULL); 1732 } 1733 1734 } 1735 1736 static int 1737 nfs4_dquery(struct nfs4_svc_args *arg, model_t model) 1738 { 1739 file_t *fp; 1740 vnode_t *vp; 1741 rnode4_t *rp; 1742 int error; 1743 STRUCT_HANDLE(nfs4_svc_args, uap); 1744 1745 STRUCT_SET_HANDLE(uap, model, arg); 1746 1747 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) 1748 return (EBADF); 1749 1750 vp = fp->f_vnode; 1751 1752 if (vp == NULL || vp->v_type != VREG || 1753 !vn_matchops(vp, nfs4_vnodeops)) { 1754 releasef(STRUCT_FGET(uap, fd)); 1755 return (EBADF); 1756 } 1757 1758 rp = VTOR4(vp); 1759 1760 /* 1761 * I can't convince myself that we need locking here. The 1762 * rnode cannot disappear and the value returned is instantly 1763 * stale anway, so why bother? 1764 */ 1765 1766 error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type); 1767 releasef(STRUCT_FGET(uap, fd)); 1768 return (error); 1769 } 1770 1771 1772 /* 1773 * NFS4 client system call. This service does the 1774 * necessary initialization for the callback program. 1775 * This is fashioned after the server side interaction 1776 * between nfsd and the kernel. On the client, the 1777 * mount command forks and the child process does the 1778 * necessary interaction with the kernel. 1779 * 1780 * uap->fd is the fd of an open transport provider 1781 */ 1782 int 1783 nfs4_svc(struct nfs4_svc_args *arg, model_t model) 1784 { 1785 file_t *fp; 1786 int error; 1787 int readsize; 1788 char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE]; 1789 char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE]; 1790 size_t len; 1791 STRUCT_HANDLE(nfs4_svc_args, uap); 1792 struct netbuf addrmask; 1793 int cmd; 1794 SVCMASTERXPRT *cb_xprt; 1795 struct nfs4_callback_globals *ncg; 1796 1797 #ifdef lint 1798 model = model; /* STRUCT macros don't always refer to it */ 1799 #endif 1800 1801 STRUCT_SET_HANDLE(uap, model, arg); 1802 1803 if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY) 1804 return (nfs4_dquery(arg, model)); 1805 1806 if (secpolicy_nfs(CRED()) != 0) 1807 return (EPERM); 1808 1809 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) 1810 return (EBADF); 1811 1812 /* 1813 * Set read buffer size to rsize 1814 * and add room for RPC headers. 1815 */ 1816 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA); 1817 if (readsize < RPC_MAXDATASIZE) 1818 readsize = RPC_MAXDATASIZE; 1819 1820 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf, 1821 KNC_STRSIZE, &len); 1822 if (error) { 1823 releasef(STRUCT_FGET(uap, fd)); 1824 return (error); 1825 } 1826 1827 cmd = STRUCT_FGET(uap, cmd); 1828 1829 if (cmd & NFS4_KRPC_START) { 1830 addrmask.len = STRUCT_FGET(uap, addrmask.len); 1831 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen); 1832 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP); 1833 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf, 1834 addrmask.len); 1835 if (error) { 1836 releasef(STRUCT_FGET(uap, fd)); 1837 kmem_free(addrmask.buf, addrmask.maxlen); 1838 return (error); 1839 } 1840 } 1841 else 1842 addrmask.buf = NULL; 1843 1844 error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr, 1845 sizeof (uaddr), &len); 1846 if (error) { 1847 releasef(STRUCT_FGET(uap, fd)); 1848 if (addrmask.buf) 1849 kmem_free(addrmask.buf, addrmask.maxlen); 1850 return (error); 1851 } 1852 1853 error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly, 1854 sizeof (protofmly), &len); 1855 if (error) { 1856 releasef(STRUCT_FGET(uap, fd)); 1857 if (addrmask.buf) 1858 kmem_free(addrmask.buf, addrmask.maxlen); 1859 return (error); 1860 } 1861 1862 error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto, 1863 sizeof (proto), &len); 1864 if (error) { 1865 releasef(STRUCT_FGET(uap, fd)); 1866 if (addrmask.buf) 1867 kmem_free(addrmask.buf, addrmask.maxlen); 1868 return (error); 1869 } 1870 1871 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1872 ASSERT(ncg != NULL); 1873 1874 mutex_enter(&ncg->nfs4_cb_lock); 1875 if (cmd & NFS4_SETPORT) 1876 nfs4_setport(buf, uaddr, protofmly, proto, ncg); 1877 1878 if (cmd & NFS4_KRPC_START) { 1879 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt, 1880 &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE); 1881 if (error) { 1882 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n", 1883 error); 1884 kmem_free(addrmask.buf, addrmask.maxlen); 1885 } 1886 } 1887 1888 mutex_exit(&ncg->nfs4_cb_lock); 1889 releasef(STRUCT_FGET(uap, fd)); 1890 return (error); 1891 } 1892 1893 struct nfs4_callback_globals * 1894 nfs4_get_callback_globals(void) 1895 { 1896 return (zone_getspecific(nfs4_callback_zone_key, nfs_zone())); 1897 } 1898 1899 static void * 1900 nfs4_callback_init_zone(zoneid_t zoneid) 1901 { 1902 kstat_t *nfs4_callback_kstat; 1903 struct nfs4_callback_globals *ncg; 1904 1905 ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP); 1906 1907 ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums * 1908 sizeof (struct nfs4_server *), KM_SLEEP); 1909 1910 ncg->nfs4prog2cbinfo = kmem_zalloc(nfs4_num_prognums * 1911 sizeof (struct nfs4_cb_info *), KM_SLEEP); 1912 1913 /* initialize the dlist */ 1914 mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL); 1915 list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode), 1916 offsetof(struct nfs4_dnode, linkage)); 1917 1918 /* initialize cb_port list */ 1919 mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL); 1920 list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port), 1921 offsetof(struct nfs4_cb_port, linkage)); 1922 1923 /* get our own copy of the kstats */ 1924 bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats, 1925 sizeof (nfs4_callback_stats_tmpl)); 1926 /* register "nfs:0:nfs4_callback_stats" for this zone */ 1927 if ((nfs4_callback_kstat = 1928 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc", 1929 KSTAT_TYPE_NAMED, 1930 sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t), 1931 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, 1932 zoneid)) != NULL) { 1933 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats; 1934 kstat_install(nfs4_callback_kstat); 1935 } 1936 return (ncg); 1937 } 1938 1939 static void 1940 nfs4_discard_delegations(struct nfs4_callback_globals *ncg) 1941 { 1942 nfs4_server_t *sp; 1943 int i, num_removed; 1944 1945 /* 1946 * It's OK here to just run through the registered "programs", as 1947 * servers without programs won't have any delegations to handle. 1948 */ 1949 for (i = 0; i < nfs4_num_prognums; i++) { 1950 rnode4_t *rp; 1951 1952 mutex_enter(&ncg->nfs4_cb_lock); 1953 sp = ncg->nfs4prog2server[i]; 1954 mutex_exit(&ncg->nfs4_cb_lock); 1955 1956 if (nfs4_server_vlock(sp, 1) == FALSE) 1957 continue; 1958 num_removed = 0; 1959 while ((rp = list_head(&sp->s_deleg_list)) != NULL) { 1960 mutex_enter(&rp->r_statev4_lock); 1961 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1962 /* 1963 * We need to take matters into our own hands, 1964 * as nfs4delegreturn_cleanup_impl() won't 1965 * remove this from the list. 1966 */ 1967 list_remove(&sp->s_deleg_list, rp); 1968 mutex_exit(&rp->r_statev4_lock); 1969 nfs4_dec_state_ref_count_nolock(sp, 1970 VTOMI4(RTOV4(rp))); 1971 num_removed++; 1972 continue; 1973 } 1974 mutex_exit(&rp->r_statev4_lock); 1975 VN_HOLD(RTOV4(rp)); 1976 mutex_exit(&sp->s_lock); 1977 /* 1978 * The following will remove the node from the list. 1979 */ 1980 nfs4delegreturn_cleanup_impl(rp, sp, ncg); 1981 VN_RELE(RTOV4(rp)); 1982 mutex_enter(&sp->s_lock); 1983 } 1984 mutex_exit(&sp->s_lock); 1985 /* each removed list node reles a reference */ 1986 while (num_removed-- > 0) 1987 nfs4_server_rele(sp); 1988 /* remove our reference for nfs4_server_vlock */ 1989 nfs4_server_rele(sp); 1990 } 1991 } 1992 1993 /* ARGSUSED */ 1994 static void 1995 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data) 1996 { 1997 struct nfs4_callback_globals *ncg = data; 1998 1999 /* 2000 * Clean pending delegation return list. 2001 */ 2002 nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD); 2003 2004 /* 2005 * Discard all delegations. 2006 */ 2007 nfs4_discard_delegations(ncg); 2008 } 2009 2010 static void 2011 nfs4_callback_fini_zone(zoneid_t zoneid, void *data) 2012 { 2013 struct nfs4_callback_globals *ncg = data; 2014 struct nfs4_cb_port *p; 2015 nfs4_server_t *sp, *next; 2016 nfs4_server_t freelist; 2017 int i; 2018 2019 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid); 2020 2021 /* 2022 * Discard all delegations that may have crept in since we did the 2023 * _shutdown. 2024 */ 2025 nfs4_discard_delegations(ncg); 2026 /* 2027 * We're completely done with this zone and all associated 2028 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one 2029 * more reference outstanding -- the reference we didn't release in 2030 * nfs4_renew_lease_thread(). 2031 * 2032 * Here we need to run through the global nfs4_server_lst as we need to 2033 * deal with nfs4_server_ts without programs, as they also have threads 2034 * created for them, and so have outstanding references that we need to 2035 * release. 2036 */ 2037 freelist.forw = &freelist; 2038 freelist.back = &freelist; 2039 mutex_enter(&nfs4_server_lst_lock); 2040 sp = nfs4_server_lst.forw; 2041 while (sp != &nfs4_server_lst) { 2042 next = sp->forw; 2043 if (sp->zoneid == zoneid) { 2044 remque(sp); 2045 insque(sp, &freelist); 2046 } 2047 sp = next; 2048 } 2049 mutex_exit(&nfs4_server_lst_lock); 2050 2051 sp = freelist.forw; 2052 while (sp != &freelist) { 2053 next = sp->forw; 2054 nfs4_server_rele(sp); /* free the list's reference */ 2055 sp = next; 2056 } 2057 2058 #ifdef DEBUG 2059 for (i = 0; i < nfs4_num_prognums; i++) { 2060 ASSERT(ncg->nfs4prog2server[i] == NULL); 2061 } 2062 #endif 2063 kmem_free(ncg->nfs4prog2server, nfs4_num_prognums * 2064 sizeof (struct nfs4_server *)); 2065 2066 mutex_enter(&ncg->nfs4_cb_lock); 2067 while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) { 2068 list_remove(&ncg->nfs4_cb_ports, p); 2069 kmem_free(p, sizeof (*p)); 2070 } 2071 list_destroy(&ncg->nfs4_cb_ports); 2072 mutex_destroy(&ncg->nfs4_cb_lock); 2073 list_destroy(&ncg->nfs4_dlist); 2074 mutex_destroy(&ncg->nfs4_dlist_lock); 2075 kmem_free(ncg, sizeof (*ncg)); 2076 } 2077 2078 void 2079 nfs4_callback_init(void) 2080 { 2081 int i; 2082 SVC_CALLOUT *nfs4_cb_sc; 2083 2084 /* initialize the callback table */ 2085 nfs4_cb_sc = kmem_alloc(nfs4_num_prognums * 2086 sizeof (SVC_CALLOUT), KM_SLEEP); 2087 2088 for (i = 0; i < nfs4_num_prognums; i++) { 2089 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i; 2090 nfs4_cb_sc[i].sc_versmin = NFS_CB; 2091 nfs4_cb_sc[i].sc_versmax = NFS_CB; 2092 nfs4_cb_sc[i].sc_dispatch = cb_dispatch; 2093 } 2094 2095 nfs4_cb_sct.sct_size = nfs4_num_prognums; 2096 nfs4_cb_sct.sct_free = FALSE; 2097 nfs4_cb_sct.sct_sc = nfs4_cb_sc; 2098 2099 /* 2100 * Compute max bytes required for dyamically allocated parts 2101 * of cb_getattr reply. Only size and change are supported now. 2102 * If CB_GETATTR is changed to reply with additional attrs, 2103 * additional sizes must be added below. 2104 * 2105 * fattr4_change + fattr4_size == uint64_t + uint64_t 2106 */ 2107 cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT; 2108 2109 zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone, 2110 nfs4_callback_shutdown_zone, nfs4_callback_fini_zone); 2111 } 2112 2113 void 2114 nfs4_callback_fini(void) 2115 { 2116 } 2117 2118 /* 2119 * NB: This function can be called from the *wrong* zone (ie, the zone that 2120 * 'rp' belongs to and the caller's zone may not be the same). This can happen 2121 * if the zone is going away and we get called from nfs4_async_inactive(). In 2122 * this case the globals will be NULL and we won't update the counters, which 2123 * doesn't matter as the zone is going away anyhow. 2124 */ 2125 static void 2126 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np, 2127 struct nfs4_callback_globals *ncg) 2128 { 2129 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 2130 boolean_t need_rele = B_FALSE; 2131 2132 /* 2133 * Caller must be holding mi_recovlock in read mode 2134 * to call here. This is provided by start_op. 2135 * Delegation management requires to grab s_lock 2136 * first and then r_statev4_lock. 2137 */ 2138 2139 if (np == NULL) { 2140 np = find_nfs4_server_all(mi, 1); 2141 ASSERT(np != NULL); 2142 need_rele = B_TRUE; 2143 } else { 2144 mutex_enter(&np->s_lock); 2145 } 2146 2147 mutex_enter(&rp->r_statev4_lock); 2148 2149 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2150 mutex_exit(&rp->r_statev4_lock); 2151 mutex_exit(&np->s_lock); 2152 if (need_rele) 2153 nfs4_server_rele(np); 2154 return; 2155 } 2156 2157 /* 2158 * Free the cred originally held when 2159 * the delegation was granted. Caller must 2160 * hold this cred if it wants to use it after 2161 * this call. 2162 */ 2163 crfree(rp->r_deleg_cred); 2164 rp->r_deleg_cred = NULL; 2165 rp->r_deleg_type = OPEN_DELEGATE_NONE; 2166 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 2167 rp->r_deleg_needs_recall = FALSE; 2168 rp->r_deleg_return_pending = FALSE; 2169 2170 /* 2171 * Remove the rnode from the server's list and 2172 * update the ref counts. 2173 */ 2174 list_remove(&np->s_deleg_list, rp); 2175 mutex_exit(&rp->r_statev4_lock); 2176 nfs4_dec_state_ref_count_nolock(np, mi); 2177 mutex_exit(&np->s_lock); 2178 /* removed list node removes a reference */ 2179 nfs4_server_rele(np); 2180 if (need_rele) 2181 nfs4_server_rele(np); 2182 if (ncg != NULL) 2183 ncg->nfs4_callback_stats.delegations.value.ui64--; 2184 } 2185 2186 void 2187 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np) 2188 { 2189 struct nfs4_callback_globals *ncg; 2190 2191 if (np != NULL) { 2192 ncg = np->zone_globals; 2193 } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) { 2194 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2195 ASSERT(ncg != NULL); 2196 } else { 2197 /* 2198 * Request coming from the wrong zone. 2199 */ 2200 ASSERT(getzoneid() == GLOBAL_ZONEID); 2201 ncg = NULL; 2202 } 2203 2204 nfs4delegreturn_cleanup_impl(rp, np, ncg); 2205 } 2206 2207 static void 2208 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 2209 cred_t *cr, vnode_t *vp) 2210 { 2211 if (error != ETIMEDOUT && error != EINTR && 2212 !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 2213 lost_rqstp->lr_op = 0; 2214 return; 2215 } 2216 2217 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2218 "nfs4close_save_lost_rqst: error %d", error)); 2219 2220 lost_rqstp->lr_op = OP_DELEGRETURN; 2221 /* 2222 * The vp is held and rele'd via the recovery code. 2223 * See nfs4_save_lost_rqst. 2224 */ 2225 lost_rqstp->lr_vp = vp; 2226 lost_rqstp->lr_dvp = NULL; 2227 lost_rqstp->lr_oop = NULL; 2228 lost_rqstp->lr_osp = NULL; 2229 lost_rqstp->lr_lop = NULL; 2230 lost_rqstp->lr_cr = cr; 2231 lost_rqstp->lr_flk = NULL; 2232 lost_rqstp->lr_putfirst = FALSE; 2233 } 2234 2235 static void 2236 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep) 2237 { 2238 COMPOUND4args_clnt args; 2239 COMPOUND4res_clnt res; 2240 nfs_argop4 argops[3]; 2241 nfs4_ga_res_t *garp = NULL; 2242 hrtime_t t; 2243 int numops; 2244 int doqueue = 1; 2245 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 2246 2247 args.ctag = TAG_DELEGRETURN; 2248 2249 numops = 3; /* PUTFH, GETATTR, DELEGRETURN */ 2250 2251 args.array = argops; 2252 args.array_len = numops; 2253 2254 argops[0].argop = OP_CPUTFH; 2255 argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 2256 2257 argops[1].argop = OP_GETATTR; 2258 argops[1].nfs_argop4_u.opgetattr.attr_request = 2259 MI4_DEFAULT_ATTRMAP(mi); 2260 argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp)); 2261 2262 argops[2].argop = OP_DELEGRETURN; 2263 argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid = 2264 rp->r_deleg_stateid; 2265 2266 t = gethrtime(); 2267 rfs4call(VTOMI4(RTOV4(rp)), NULL, &args, &res, cr, &doqueue, 0, ep); 2268 2269 if (ep->error) 2270 return; 2271 2272 if (res.status == NFS4_OK) { 2273 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 2274 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL); 2275 2276 } 2277 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2278 } 2279 2280 int 2281 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr, 2282 struct nfs4_callback_globals *ncg) 2283 { 2284 vnode_t *vp = RTOV4(rp); 2285 mntinfo4_t *mi = VTOMI4(vp); 2286 nfs4_lost_rqst_t lost_rqst; 2287 nfs4_recov_state_t recov_state; 2288 bool_t needrecov = FALSE, recovonly, done = FALSE; 2289 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2290 2291 ncg->nfs4_callback_stats.delegreturn.value.ui64++; 2292 2293 while (!done) { 2294 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN, 2295 &recov_state, &recovonly); 2296 2297 if (e.error) { 2298 if (flags & NFS4_DR_FORCE) { 2299 (void) nfs_rw_enter_sig(&mi->mi_recovlock, 2300 RW_READER, 0); 2301 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 2302 nfs_rw_exit(&mi->mi_recovlock); 2303 } 2304 break; 2305 } 2306 2307 /* 2308 * Check to see if the delegation has already been 2309 * returned by the recovery thread. The state of 2310 * the delegation cannot change at this point due 2311 * to start_fop and the r_deleg_recall_lock. 2312 */ 2313 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2314 e.error = 0; 2315 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 2316 break; 2317 } 2318 2319 if (recovonly) { 2320 /* 2321 * Delegation will be returned via the 2322 * recovery framework. Build a lost request 2323 * structure, start recovery and get out. 2324 */ 2325 nfs4_error_init(&e, EINTR); 2326 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst, 2327 cr, vp); 2328 (void) nfs4_start_recovery(&e, mi, vp, 2329 NULL, &rp->r_deleg_stateid, 2330 lost_rqst.lr_op == OP_DELEGRETURN ? 2331 &lost_rqst : NULL, OP_DELEGRETURN, NULL); 2332 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 2333 break; 2334 } 2335 2336 nfs4delegreturn_otw(rp, cr, &e); 2337 2338 /* 2339 * Ignore some errors on delegreturn; no point in marking 2340 * the file dead on a state destroying operation. 2341 */ 2342 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) || 2343 e.stat == NFS4ERR_BADHANDLE || 2344 e.stat == NFS4ERR_STALE)) 2345 needrecov = FALSE; 2346 else 2347 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp); 2348 2349 if (needrecov) { 2350 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst, 2351 cr, vp); 2352 (void) nfs4_start_recovery(&e, mi, vp, 2353 NULL, &rp->r_deleg_stateid, 2354 lost_rqst.lr_op == OP_DELEGRETURN ? 2355 &lost_rqst : NULL, OP_DELEGRETURN, NULL); 2356 } else { 2357 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 2358 done = TRUE; 2359 } 2360 2361 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 2362 } 2363 return (e.error); 2364 } 2365 2366 /* 2367 * nfs4_resend_delegreturn - used to drive the delegreturn 2368 * operation via the recovery thread. 2369 */ 2370 void 2371 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep, 2372 nfs4_server_t *np) 2373 { 2374 rnode4_t *rp = VTOR4(lorp->lr_vp); 2375 2376 /* If the file failed recovery, just quit. */ 2377 mutex_enter(&rp->r_statelock); 2378 if (rp->r_flags & R4RECOVERR) { 2379 ep->error = EIO; 2380 } 2381 mutex_exit(&rp->r_statelock); 2382 2383 if (!ep->error) 2384 nfs4delegreturn_otw(rp, lorp->lr_cr, ep); 2385 2386 /* 2387 * If recovery is now needed, then return the error 2388 * and status and let the recovery thread handle it, 2389 * including re-driving another delegreturn. Otherwise, 2390 * just give up and clean up the delegation. 2391 */ 2392 if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp)) 2393 return; 2394 2395 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) 2396 nfs4delegreturn_cleanup(rp, np); 2397 2398 nfs4_error_zinit(ep); 2399 } 2400 2401 /* 2402 * nfs4delegreturn - general function to return a delegation. 2403 * 2404 * NFS4_DR_FORCE - return the delegation even if start_op fails 2405 * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE 2406 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn 2407 * NFS4_DR_DID_OP - calling function already did nfs4_start_op 2408 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL 2409 * NFS4_DR_REOPEN - do file reopens, if applicable 2410 */ 2411 static int 2412 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg) 2413 { 2414 int error = 0; 2415 cred_t *cr = NULL; 2416 vnode_t *vp; 2417 bool_t needrecov = FALSE; 2418 bool_t rw_entered = FALSE; 2419 bool_t do_reopen; 2420 2421 vp = RTOV4(rp); 2422 2423 /* 2424 * If NFS4_DR_DISCARD is set by itself, take a short-cut and 2425 * discard without doing an otw DELEGRETURN. This may only be used 2426 * by the recovery thread because it bypasses the synchronization 2427 * with r_deleg_recall_lock and mi->mi_recovlock. 2428 */ 2429 if (flags == NFS4_DR_DISCARD) { 2430 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 2431 return (0); 2432 } 2433 2434 if (flags & NFS4_DR_DID_OP) { 2435 /* 2436 * Caller had already done start_op, which means the 2437 * r_deleg_recall_lock is already held in READ mode 2438 * so we cannot take it in write mode. Return the 2439 * delegation asynchronously. 2440 * 2441 * Remove the NFS4_DR_DID_OP flag so we don't 2442 * get stuck looping through here. 2443 */ 2444 VN_HOLD(vp); 2445 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE); 2446 return (0); 2447 } 2448 2449 /* 2450 * Verify we still have a delegation and crhold the credential. 2451 */ 2452 mutex_enter(&rp->r_statev4_lock); 2453 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2454 mutex_exit(&rp->r_statev4_lock); 2455 goto out; 2456 } 2457 cr = rp->r_deleg_cred; 2458 ASSERT(cr != NULL); 2459 crhold(cr); 2460 mutex_exit(&rp->r_statev4_lock); 2461 2462 /* 2463 * Push the modified data back to the server synchronously 2464 * before doing DELEGRETURN. 2465 */ 2466 if (flags & NFS4_DR_PUSH) 2467 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL); 2468 2469 /* 2470 * Take r_deleg_recall_lock in WRITE mode, this will prevent 2471 * nfs4_is_otw_open_necessary from trying to use the delegation 2472 * while the DELEGRETURN is in progress. 2473 */ 2474 (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE); 2475 2476 rw_entered = TRUE; 2477 2478 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) 2479 goto out; 2480 2481 if (flags & NFS4_DR_REOPEN) { 2482 /* 2483 * If R4RECOVERRP is already set, then skip re-opening 2484 * the delegation open streams and go straight to doing 2485 * delegreturn. (XXX if the file has failed recovery, then the 2486 * delegreturn attempt is likely to be futile.) 2487 */ 2488 mutex_enter(&rp->r_statelock); 2489 do_reopen = !(rp->r_flags & R4RECOVERRP); 2490 mutex_exit(&rp->r_statelock); 2491 2492 if (do_reopen) { 2493 error = deleg_reopen(vp, &needrecov, ncg, flags); 2494 if (error != 0) { 2495 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL)) 2496 == 0) 2497 goto out; 2498 } else if (needrecov) { 2499 if ((flags & NFS4_DR_FORCE) == 0) 2500 goto out; 2501 } 2502 } 2503 } 2504 2505 if (flags & NFS4_DR_DISCARD) { 2506 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 2507 2508 mutex_enter(&rp->r_statelock); 2509 /* 2510 * deleg_return_pending is cleared inside of delegation_accept 2511 * when a delegation is accepted. if this flag has been 2512 * cleared, then a new delegation has overwritten the one we 2513 * were about to throw away. 2514 */ 2515 if (!rp->r_deleg_return_pending) { 2516 mutex_exit(&rp->r_statelock); 2517 goto out; 2518 } 2519 mutex_exit(&rp->r_statelock); 2520 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 2521 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 2522 nfs_rw_exit(&mi->mi_recovlock); 2523 } else { 2524 error = nfs4_do_delegreturn(rp, flags, cr, ncg); 2525 } 2526 2527 out: 2528 if (cr) 2529 crfree(cr); 2530 if (rw_entered) 2531 nfs_rw_exit(&rp->r_deleg_recall_lock); 2532 return (error); 2533 } 2534 2535 int 2536 nfs4delegreturn(rnode4_t *rp, int flags) 2537 { 2538 struct nfs4_callback_globals *ncg; 2539 2540 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2541 ASSERT(ncg != NULL); 2542 2543 return (nfs4delegreturn_impl(rp, flags, ncg)); 2544 } 2545 2546 void 2547 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc) 2548 { 2549 struct cb_recall_pass *pp; 2550 2551 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP); 2552 pp->rp = rp; 2553 pp->flags = flags; 2554 pp->truncate = trunc; 2555 2556 /* 2557 * Fire up a thread to do the actual delegreturn 2558 * Caller must guarantee that the rnode doesn't 2559 * vanish (by calling VN_HOLD). 2560 */ 2561 2562 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0, 2563 minclsyspri); 2564 } 2565 2566 static void 2567 delegreturn_all_thread(rpcprog_t *pp) 2568 { 2569 nfs4_server_t *np; 2570 bool_t found = FALSE; 2571 rpcprog_t prog; 2572 rnode4_t *rp; 2573 vnode_t *vp; 2574 zoneid_t zoneid = getzoneid(); 2575 struct nfs4_callback_globals *ncg; 2576 2577 NFS4_DEBUG(nfs4_drat_debug, 2578 (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp)); 2579 2580 prog = *pp; 2581 kmem_free(pp, sizeof (*pp)); 2582 pp = NULL; 2583 2584 mutex_enter(&nfs4_server_lst_lock); 2585 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 2586 if (np->zoneid == zoneid && np->s_program == prog) { 2587 mutex_enter(&np->s_lock); 2588 found = TRUE; 2589 break; 2590 } 2591 } 2592 mutex_exit(&nfs4_server_lst_lock); 2593 2594 /* 2595 * It's possible that the nfs4_server which was using this 2596 * program number has vanished since this thread is async. 2597 * If so, just return. Your work here is finished, my friend. 2598 */ 2599 if (!found) 2600 goto out; 2601 2602 ncg = np->zone_globals; 2603 while ((rp = list_head(&np->s_deleg_list)) != NULL) { 2604 vp = RTOV4(rp); 2605 VN_HOLD(vp); 2606 mutex_exit(&np->s_lock); 2607 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN, 2608 ncg); 2609 VN_RELE(vp); 2610 2611 /* retake the s_lock for next trip through the loop */ 2612 mutex_enter(&np->s_lock); 2613 } 2614 mutex_exit(&np->s_lock); 2615 out: 2616 NFS4_DEBUG(nfs4_drat_debug, 2617 (CE_NOTE, "delereturn_all_thread: complete\n")); 2618 zthread_exit(); 2619 } 2620 2621 void 2622 nfs4_delegreturn_all(nfs4_server_t *sp) 2623 { 2624 rpcprog_t pro, *pp; 2625 2626 mutex_enter(&sp->s_lock); 2627 2628 /* Check to see if the delegation list is empty */ 2629 2630 if (list_head(&sp->s_deleg_list) == NULL) { 2631 mutex_exit(&sp->s_lock); 2632 return; 2633 } 2634 /* 2635 * Grab the program number; the async thread will use this 2636 * to find the nfs4_server. 2637 */ 2638 pro = sp->s_program; 2639 mutex_exit(&sp->s_lock); 2640 pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP); 2641 *pp = pro; 2642 (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0, 2643 minclsyspri); 2644 } 2645 2646 2647 /* 2648 * Discard any delegations 2649 * 2650 * Iterate over the servers s_deleg_list and 2651 * for matching mount-point rnodes discard 2652 * the delegation. 2653 */ 2654 void 2655 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp) 2656 { 2657 rnode4_t *rp, *next; 2658 mntinfo4_t *r_mi; 2659 struct nfs4_callback_globals *ncg; 2660 2661 ASSERT(mutex_owned(&sp->s_lock)); 2662 ncg = sp->zone_globals; 2663 2664 for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) { 2665 r_mi = VTOMI4(RTOV4(rp)); 2666 next = list_next(&sp->s_deleg_list, rp); 2667 2668 if (r_mi != mi) { 2669 /* 2670 * Skip if this rnode is in not on the 2671 * same mount-point 2672 */ 2673 continue; 2674 } 2675 2676 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ); 2677 2678 #ifdef DEBUG 2679 if (nfs4_client_recov_debug) { 2680 zprintf(getzoneid(), 2681 "nfs4_deleg_discard: matched rnode %p " 2682 "-- discarding delegation\n", (void *)rp); 2683 } 2684 #endif 2685 mutex_enter(&rp->r_statev4_lock); 2686 /* 2687 * Free the cred originally held when the delegation 2688 * was granted. Also need to decrement the refcnt 2689 * on this server for each delegation we discard 2690 */ 2691 if (rp->r_deleg_cred) 2692 crfree(rp->r_deleg_cred); 2693 rp->r_deleg_cred = NULL; 2694 rp->r_deleg_type = OPEN_DELEGATE_NONE; 2695 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 2696 rp->r_deleg_needs_recall = FALSE; 2697 ASSERT(sp->s_refcnt > 1); 2698 sp->s_refcnt--; 2699 list_remove(&sp->s_deleg_list, rp); 2700 mutex_exit(&rp->r_statev4_lock); 2701 nfs4_dec_state_ref_count_nolock(sp, mi); 2702 ncg->nfs4_callback_stats.delegations.value.ui64--; 2703 } 2704 } 2705 2706 /* 2707 * Reopen any open streams that were covered by the given file's 2708 * delegation. 2709 * Returns zero or an errno value. If there was no error, *recovp 2710 * indicates whether recovery was initiated. 2711 */ 2712 2713 static int 2714 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg, 2715 int flags) 2716 { 2717 nfs4_open_stream_t *osp; 2718 nfs4_recov_state_t recov_state; 2719 bool_t needrecov = FALSE; 2720 mntinfo4_t *mi; 2721 rnode4_t *rp; 2722 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2723 int claimnull; 2724 2725 mi = VTOMI4(vp); 2726 rp = VTOR4(vp); 2727 2728 recov_state.rs_flags = 0; 2729 recov_state.rs_num_retry_despite_err = 0; 2730 2731 retry: 2732 if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) { 2733 return (e.error); 2734 } 2735 2736 /* 2737 * if we mean to discard the delegation, it must be BAD, so don't 2738 * use it when doing the reopen or it will fail too. 2739 */ 2740 claimnull = (flags & NFS4_DR_DISCARD); 2741 /* 2742 * Loop through the open streams for this rnode to find 2743 * all of the ones created using the delegation state ID. 2744 * Each of these needs to be re-opened. 2745 */ 2746 2747 while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) { 2748 2749 if (claimnull) { 2750 nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE); 2751 } else { 2752 ncg->nfs4_callback_stats.claim_cur.value.ui64++; 2753 2754 nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE, 2755 FALSE); 2756 if (e.error == 0 && e.stat == NFS4_OK) 2757 ncg->nfs4_callback_stats. 2758 claim_cur_ok.value.ui64++; 2759 } 2760 2761 if (e.error == EAGAIN) { 2762 nfs4_end_op(mi, vp, NULL, &recov_state, TRUE); 2763 goto retry; 2764 } 2765 2766 /* 2767 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then 2768 * recovery has already been started inside of nfs4_reopen. 2769 */ 2770 if (e.error == EINTR || e.error == ETIMEDOUT || 2771 NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) { 2772 open_stream_rele(osp, rp); 2773 break; 2774 } 2775 2776 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp); 2777 2778 if (e.error != 0 && !needrecov) { 2779 /* 2780 * Recovery is not possible, but don't give up yet; 2781 * we'd still like to do delegreturn after 2782 * reopening as many streams as possible. 2783 * Continue processing the open streams. 2784 */ 2785 2786 ncg->nfs4_callback_stats.recall_failed.value.ui64++; 2787 2788 } else if (needrecov) { 2789 /* 2790 * Start recovery and bail out. The recovery 2791 * thread will take it from here. 2792 */ 2793 (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL, 2794 NULL, OP_OPEN, NULL); 2795 open_stream_rele(osp, rp); 2796 *recovp = TRUE; 2797 break; 2798 } 2799 2800 open_stream_rele(osp, rp); 2801 } 2802 2803 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 2804 2805 return (e.error); 2806 } 2807 2808 /* 2809 * get_next_deleg_stream - returns the next open stream which 2810 * represents a delegation for this rnode. In order to assure 2811 * forward progress, the caller must guarantee that each open 2812 * stream returned is changed so that a future call won't return 2813 * it again. 2814 * 2815 * There are several ways for the open stream to change. If the open 2816 * stream is !os_delegation, then we aren't interested in it. Also, if 2817 * either os_failed_reopen or !os_valid, then don't return the osp. 2818 * 2819 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return 2820 * the osp if it is an os_delegation open stream. Also, if the rnode still 2821 * has r_deleg_return_pending, then return the os_delegation osp. Lastly, 2822 * if the rnode's r_deleg_stateid is different from the osp's open_stateid, 2823 * then return the osp. 2824 * 2825 * We have already taken the 'r_deleg_recall_lock' as WRITER, which 2826 * prevents new OPENs from going OTW (as start_fop takes this 2827 * lock in READ mode); thus, no new open streams can be created 2828 * (which inherently means no new delegation open streams are 2829 * being created). 2830 */ 2831 2832 static nfs4_open_stream_t * 2833 get_next_deleg_stream(rnode4_t *rp, int claimnull) 2834 { 2835 nfs4_open_stream_t *osp; 2836 2837 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER)); 2838 2839 /* 2840 * Search through the list of open streams looking for 2841 * one that was created while holding the delegation. 2842 */ 2843 mutex_enter(&rp->r_os_lock); 2844 for (osp = list_head(&rp->r_open_streams); osp != NULL; 2845 osp = list_next(&rp->r_open_streams, osp)) { 2846 mutex_enter(&osp->os_sync_lock); 2847 if (!osp->os_delegation || osp->os_failed_reopen || 2848 !osp->os_valid) { 2849 mutex_exit(&osp->os_sync_lock); 2850 continue; 2851 } 2852 if (!claimnull || rp->r_deleg_return_pending || 2853 !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) { 2854 osp->os_ref_count++; 2855 mutex_exit(&osp->os_sync_lock); 2856 mutex_exit(&rp->r_os_lock); 2857 return (osp); 2858 } 2859 mutex_exit(&osp->os_sync_lock); 2860 } 2861 mutex_exit(&rp->r_os_lock); 2862 2863 return (NULL); 2864 } 2865 2866 static void 2867 nfs4delegreturn_thread(struct cb_recall_pass *args) 2868 { 2869 rnode4_t *rp; 2870 vnode_t *vp; 2871 cred_t *cr; 2872 int dtype, error, flags; 2873 bool_t rdirty, rip; 2874 kmutex_t cpr_lock; 2875 callb_cpr_t cpr_info; 2876 struct nfs4_callback_globals *ncg; 2877 2878 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2879 ASSERT(ncg != NULL); 2880 2881 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 2882 2883 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 2884 "nfsv4delegRtn"); 2885 2886 rp = args->rp; 2887 vp = RTOV4(rp); 2888 2889 mutex_enter(&rp->r_statev4_lock); 2890 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2891 mutex_exit(&rp->r_statev4_lock); 2892 goto out; 2893 } 2894 mutex_exit(&rp->r_statev4_lock); 2895 2896 /* 2897 * Take the read-write lock in read mode to prevent other 2898 * threads from modifying the data during the recall. This 2899 * doesn't affect mmappers. 2900 */ 2901 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE); 2902 2903 /* Proceed with delegreturn */ 2904 2905 mutex_enter(&rp->r_statev4_lock); 2906 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2907 mutex_exit(&rp->r_statev4_lock); 2908 nfs_rw_exit(&rp->r_rwlock); 2909 goto out; 2910 } 2911 dtype = rp->r_deleg_type; 2912 cr = rp->r_deleg_cred; 2913 ASSERT(cr != NULL); 2914 crhold(cr); 2915 mutex_exit(&rp->r_statev4_lock); 2916 2917 flags = args->flags; 2918 2919 /* 2920 * If the file is being truncated at the server, then throw 2921 * away all of the pages, it doesn't matter what flavor of 2922 * delegation we have. 2923 */ 2924 2925 if (args->truncate) { 2926 ncg->nfs4_callback_stats.recall_trunc.value.ui64++; 2927 nfs4_invalidate_pages(vp, 0, cr); 2928 } else if (dtype == OPEN_DELEGATE_WRITE) { 2929 2930 mutex_enter(&rp->r_statelock); 2931 rdirty = rp->r_flags & R4DIRTY; 2932 mutex_exit(&rp->r_statelock); 2933 2934 if (rdirty) { 2935 error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL); 2936 2937 if (error) 2938 CB_WARN1("nfs4delegreturn_thread:" 2939 " VOP_PUTPAGE: %d\n", error); 2940 } 2941 /* turn off NFS4_DR_PUSH because we just did that above. */ 2942 flags &= ~NFS4_DR_PUSH; 2943 } 2944 2945 mutex_enter(&rp->r_statelock); 2946 rip = rp->r_flags & R4RECOVERRP; 2947 mutex_exit(&rp->r_statelock); 2948 2949 /* If a failed recovery is indicated, discard the pages */ 2950 2951 if (rip) { 2952 2953 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL); 2954 2955 if (error) 2956 CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n", 2957 error); 2958 } 2959 2960 /* 2961 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass 2962 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again. 2963 */ 2964 flags &= ~NFS4_DR_DID_OP; 2965 2966 (void) nfs4delegreturn_impl(rp, flags, ncg); 2967 2968 nfs_rw_exit(&rp->r_rwlock); 2969 crfree(cr); 2970 out: 2971 kmem_free(args, sizeof (struct cb_recall_pass)); 2972 VN_RELE(vp); 2973 mutex_enter(&cpr_lock); 2974 CALLB_CPR_EXIT(&cpr_info); 2975 mutex_destroy(&cpr_lock); 2976 zthread_exit(); 2977 } 2978 2979 /* 2980 * This function has one assumption that the caller of this function is 2981 * either doing recovery (therefore cannot call nfs4_start_op) or has 2982 * already called nfs4_start_op(). 2983 */ 2984 void 2985 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res, 2986 nfs4_ga_res_t *garp, cred_t *cr) 2987 { 2988 open_read_delegation4 *orp; 2989 open_write_delegation4 *owp; 2990 nfs4_server_t *np; 2991 bool_t already = FALSE; 2992 bool_t recall = FALSE; 2993 bool_t valid_garp = TRUE; 2994 bool_t delegation_granted = FALSE; 2995 bool_t dr_needed = FALSE; 2996 bool_t recov; 2997 int dr_flags = 0; 2998 long mapcnt; 2999 uint_t rflag; 3000 mntinfo4_t *mi; 3001 struct nfs4_callback_globals *ncg; 3002 open_delegation_type4 odt; 3003 3004 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 3005 ASSERT(ncg != NULL); 3006 3007 mi = VTOMI4(RTOV4(rp)); 3008 3009 /* 3010 * Accept a delegation granted to the client via an OPEN. 3011 * Set the delegation fields in the rnode and insert the 3012 * rnode onto the list anchored in the nfs4_server_t. The 3013 * proper locking order requires the nfs4_server_t first, 3014 * even though it may not be needed in all cases. 3015 * 3016 * NB: find_nfs4_server returns with s_lock held. 3017 */ 3018 3019 if ((np = find_nfs4_server(mi)) == NULL) 3020 return; 3021 3022 /* grab the statelock too, for examining r_mapcnt */ 3023 mutex_enter(&rp->r_statelock); 3024 mutex_enter(&rp->r_statev4_lock); 3025 3026 if (rp->r_deleg_type == OPEN_DELEGATE_READ || 3027 rp->r_deleg_type == OPEN_DELEGATE_WRITE) 3028 already = TRUE; 3029 3030 odt = res->delegation.delegation_type; 3031 3032 if (odt == OPEN_DELEGATE_READ) { 3033 3034 rp->r_deleg_type = res->delegation.delegation_type; 3035 orp = &res->delegation.open_delegation4_u.read; 3036 rp->r_deleg_stateid = orp->stateid; 3037 rp->r_deleg_perms = orp->permissions; 3038 if (claim == CLAIM_PREVIOUS) 3039 if ((recall = orp->recall) != 0) 3040 dr_needed = TRUE; 3041 3042 delegation_granted = TRUE; 3043 3044 ncg->nfs4_callback_stats.delegations.value.ui64++; 3045 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++; 3046 3047 } else if (odt == OPEN_DELEGATE_WRITE) { 3048 3049 rp->r_deleg_type = res->delegation.delegation_type; 3050 owp = &res->delegation.open_delegation4_u.write; 3051 rp->r_deleg_stateid = owp->stateid; 3052 rp->r_deleg_perms = owp->permissions; 3053 rp->r_deleg_limit = owp->space_limit; 3054 if (claim == CLAIM_PREVIOUS) 3055 if ((recall = owp->recall) != 0) 3056 dr_needed = TRUE; 3057 3058 delegation_granted = TRUE; 3059 3060 if (garp == NULL || !garp->n4g_change_valid) { 3061 valid_garp = FALSE; 3062 rp->r_deleg_change = 0; 3063 rp->r_deleg_change_grant = 0; 3064 } else { 3065 rp->r_deleg_change = garp->n4g_change; 3066 rp->r_deleg_change_grant = garp->n4g_change; 3067 } 3068 mapcnt = rp->r_mapcnt; 3069 rflag = rp->r_flags; 3070 3071 /* 3072 * Update the delegation change attribute if 3073 * there are mappers for the file is dirty. This 3074 * might be the case during recovery after server 3075 * reboot. 3076 */ 3077 if (mapcnt > 0 || rflag & R4DIRTY) 3078 rp->r_deleg_change++; 3079 3080 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, 3081 "nfs4_delegation_accept: r_deleg_change: 0x%x\n", 3082 (int)(rp->r_deleg_change >> 32))); 3083 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, 3084 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n", 3085 (int)(rp->r_deleg_change_grant >> 32))); 3086 3087 3088 ncg->nfs4_callback_stats.delegations.value.ui64++; 3089 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++; 3090 } else if (already) { 3091 /* 3092 * No delegation granted. If the rnode currently has 3093 * has one, then consider it tainted and return it. 3094 */ 3095 dr_needed = TRUE; 3096 } 3097 3098 if (delegation_granted) { 3099 /* Add the rnode to the list. */ 3100 if (!already) { 3101 crhold(cr); 3102 rp->r_deleg_cred = cr; 3103 3104 ASSERT(mutex_owned(&np->s_lock)); 3105 list_insert_head(&np->s_deleg_list, rp); 3106 /* added list node gets a reference */ 3107 np->s_refcnt++; 3108 nfs4_inc_state_ref_count_nolock(np, mi); 3109 } 3110 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 3111 } 3112 3113 /* 3114 * We've now safely accepted the delegation, if any. Drop the 3115 * locks and figure out what post-processing is needed. We'd 3116 * like to retain r_statev4_lock, but nfs4_server_rele takes 3117 * s_lock which would be a lock ordering violation. 3118 */ 3119 mutex_exit(&rp->r_statev4_lock); 3120 mutex_exit(&rp->r_statelock); 3121 mutex_exit(&np->s_lock); 3122 nfs4_server_rele(np); 3123 3124 /* 3125 * Check to see if we are in recovery. Remember that 3126 * this function is protected by start_op, so a recovery 3127 * cannot begin until we are out of here. 3128 */ 3129 mutex_enter(&mi->mi_lock); 3130 recov = mi->mi_recovflags & MI4_RECOV_ACTIV; 3131 mutex_exit(&mi->mi_lock); 3132 3133 mutex_enter(&rp->r_statev4_lock); 3134 3135 if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp) 3136 dr_needed = TRUE; 3137 3138 if (dr_needed && rp->r_deleg_return_pending == FALSE) { 3139 if (recov) { 3140 /* 3141 * We cannot call delegreturn from inside 3142 * of recovery or VOP_PUTPAGE will hang 3143 * due to nfs4_start_fop call in 3144 * nfs4write. Use dlistadd to add the 3145 * rnode to the list of rnodes needing 3146 * cleaning. We do not need to do reopen 3147 * here because recov_openfiles will do it. 3148 * In the non-recall case, just discard the 3149 * delegation as it is no longer valid. 3150 */ 3151 if (recall) 3152 dr_flags = NFS4_DR_PUSH; 3153 else 3154 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD; 3155 3156 nfs4_dlistadd(rp, ncg, dr_flags); 3157 dr_flags = 0; 3158 } else { 3159 /* 3160 * Push the modified data back to the server, 3161 * reopen any delegation open streams, and return 3162 * the delegation. Drop the statev4_lock first! 3163 */ 3164 dr_flags = NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN; 3165 } 3166 } 3167 mutex_exit(&rp->r_statev4_lock); 3168 if (dr_flags) 3169 (void) nfs4delegreturn_impl(rp, dr_flags, ncg); 3170 } 3171 3172 /* 3173 * nfs4delegabandon - Abandon the delegation on an rnode4. This code 3174 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID 3175 * or BADSEQID and the recovery code is unable to recover. Push any 3176 * dirty data back to the server and return the delegation (if any). 3177 */ 3178 3179 void 3180 nfs4delegabandon(rnode4_t *rp) 3181 { 3182 vnode_t *vp; 3183 struct cb_recall_pass *pp; 3184 open_delegation_type4 dt; 3185 3186 mutex_enter(&rp->r_statev4_lock); 3187 dt = rp->r_deleg_type; 3188 mutex_exit(&rp->r_statev4_lock); 3189 3190 if (dt == OPEN_DELEGATE_NONE) 3191 return; 3192 3193 vp = RTOV4(rp); 3194 VN_HOLD(vp); 3195 3196 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP); 3197 pp->rp = rp; 3198 /* 3199 * Recovery on the file has failed and we want to return 3200 * the delegation. We don't want to reopen files and 3201 * nfs4delegreturn_thread() figures out what to do about 3202 * the data. The only thing to do is attempt to return 3203 * the delegation. 3204 */ 3205 pp->flags = 0; 3206 pp->truncate = FALSE; 3207 3208 /* 3209 * Fire up a thread to do the delegreturn; this is 3210 * necessary because we could be inside a GETPAGE or 3211 * PUTPAGE and we cannot do another one. 3212 */ 3213 3214 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0, 3215 minclsyspri); 3216 } 3217 3218 static int 3219 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp, 3220 int flg) 3221 { 3222 rnode4_t *rp; 3223 int error = 0; 3224 3225 #ifdef lint 3226 op = op; 3227 #endif 3228 3229 if (vp && vp->v_type == VREG) { 3230 rp = VTOR4(vp); 3231 3232 /* 3233 * Take r_deleg_recall_lock in read mode to synchronize 3234 * with delegreturn. 3235 */ 3236 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock, 3237 RW_READER, INTR4(vp)); 3238 3239 if (error == 0) 3240 rsp->rs_flags |= flg; 3241 3242 } 3243 return (error); 3244 } 3245 3246 void 3247 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp) 3248 { 3249 NFS4_DEBUG(nfs4_recall_debug, 3250 (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n", 3251 (void *)vp1, (void *)vp2)); 3252 3253 if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2) 3254 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock); 3255 if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1) 3256 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock); 3257 } 3258 3259 int 3260 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 3261 nfs4_recov_state_t *rsp) 3262 { 3263 int error; 3264 3265 NFS4_DEBUG(nfs4_recall_debug, 3266 (CE_NOTE, "wait_for_recall: 0x%p, 0x%p\n", 3267 (void *)vp1, (void *) vp2)); 3268 3269 rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2); 3270 3271 if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0) 3272 return (error); 3273 3274 if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2)) 3275 != 0) { 3276 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) { 3277 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock); 3278 rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1; 3279 } 3280 3281 return (error); 3282 } 3283 3284 return (0); 3285 } 3286 3287 /* 3288 * nfs4_dlistadd - Add this rnode to a list of rnodes to be 3289 * DELEGRETURN'd at the end of recovery. 3290 */ 3291 3292 static void 3293 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags) 3294 { 3295 struct nfs4_dnode *dp; 3296 3297 ASSERT(mutex_owned(&rp->r_statev4_lock)); 3298 /* 3299 * Mark the delegation as having a return pending. 3300 * This will prevent the use of the delegation stateID 3301 * by read, write, setattr and open. 3302 */ 3303 rp->r_deleg_return_pending = TRUE; 3304 dp = kmem_alloc(sizeof (*dp), KM_SLEEP); 3305 VN_HOLD(RTOV4(rp)); 3306 dp->rnodep = rp; 3307 dp->flags = flags; 3308 mutex_enter(&ncg->nfs4_dlist_lock); 3309 list_insert_head(&ncg->nfs4_dlist, dp); 3310 #ifdef DEBUG 3311 ncg->nfs4_dlistadd_c++; 3312 #endif 3313 mutex_exit(&ncg->nfs4_dlist_lock); 3314 } 3315 3316 /* 3317 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list. 3318 * of files awaiting cleaning. If the override_flags are non-zero 3319 * then use them rather than the flags that were set when the rnode 3320 * was added to the dlist. 3321 */ 3322 static void 3323 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags) 3324 { 3325 rnode4_t *rp; 3326 struct nfs4_dnode *dp; 3327 int flags; 3328 3329 ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD); 3330 3331 mutex_enter(&ncg->nfs4_dlist_lock); 3332 while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) { 3333 #ifdef DEBUG 3334 ncg->nfs4_dlistclean_c++; 3335 #endif 3336 list_remove(&ncg->nfs4_dlist, dp); 3337 mutex_exit(&ncg->nfs4_dlist_lock); 3338 rp = dp->rnodep; 3339 flags = (override_flags != 0) ? override_flags : dp->flags; 3340 kmem_free(dp, sizeof (*dp)); 3341 (void) nfs4delegreturn_impl(rp, flags, ncg); 3342 VN_RELE(RTOV4(rp)); 3343 mutex_enter(&ncg->nfs4_dlist_lock); 3344 } 3345 mutex_exit(&ncg->nfs4_dlist_lock); 3346 } 3347 3348 void 3349 nfs4_dlistclean(void) 3350 { 3351 struct nfs4_callback_globals *ncg; 3352 3353 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 3354 ASSERT(ncg != NULL); 3355 3356 nfs4_dlistclean_impl(ncg, 0); 3357 } --- EOF ---