Move CallBack Server thread creation, initial processing and destruction to RPC
Cleanup some RPC code.
Remove extraneous fields from nfs41_cb_info and clean up the code.
Change KM_SLEEP in mir_nfs41_callback_thread to KM_NOSLEEP.
Fix lint warnings

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 #include <sys/param.h>
  32 #include <sys/types.h>
  33 #include <sys/systm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/vtrace.h>
  36 #include <sys/session.h>
  37 #include <sys/thread.h>
  38 #include <sys/dnlc.h>
  39 #include <sys/cred.h>
  40 #include <sys/priv.h>
  41 #include <sys/list.h>
  42 #include <sys/sdt.h>
  43 #include <sys/policy.h>
  44 
  45 #include <rpc/types.h>
  46 #include <rpc/xdr.h>
  47 
  48 #include <nfs/nfs.h>
  49 
  50 #include <nfs/nfs_clnt.h>
  51 
  52 #include <nfs/nfs4.h>
  53 #include <nfs/rnode4.h>
  54 #include <nfs/nfs4_clnt.h>
  55 #include <nfs/nfs41_sessions.h>
  56 #include <nfs/nfs4_clnt_impl.h>
  57 
  58 /*
  59  * client side statistics
  60  */
  61 static const struct clstat4 clstat4_tmpl = {
  62         { "calls",      KSTAT_DATA_UINT64 },
  63         { "badcalls",   KSTAT_DATA_UINT64 },
  64         { "clgets",     KSTAT_DATA_UINT64 },
  65         { "cltoomany",  KSTAT_DATA_UINT64 }
  66 };
  67 #ifdef DEBUG
  68 struct clstat4_debug clstat4_debug = {
  69         { "clalloc",    KSTAT_DATA_UINT64 },
  70         { "noresponse", KSTAT_DATA_UINT64 },
  71         { "failover",   KSTAT_DATA_UINT64 },
  72         { "remap",      KSTAT_DATA_UINT64 },
  73         { "nrnode",     KSTAT_DATA_UINT64 },
  74         { "access",     KSTAT_DATA_UINT64 },
  75         { "dirent",     KSTAT_DATA_UINT64 },
  76         { "dirents",    KSTAT_DATA_UINT64 },
  77         { "reclaim",    KSTAT_DATA_UINT64 },
  78         { "clreclaim",  KSTAT_DATA_UINT64 },
  79         { "f_reclaim",  KSTAT_DATA_UINT64 },
  80         { "a_reclaim",  KSTAT_DATA_UINT64 },
  81         { "r_reclaim",  KSTAT_DATA_UINT64 },
  82         { "r_path",     KSTAT_DATA_UINT64 }
  83 };
  84 #endif
  85 
  86 /*
  87  * We keep a global list of per-zone client data, so we can clean up all zones
  88  * if we get low on memory.
  89  */
  90 static list_t nfs4_clnt_list;
  91 static kmutex_t nfs4_clnt_list_lock;
  92 
  93 static struct kmem_cache *chtab4_cache;
  94 
  95 #ifdef DEBUG
  96 static int nfs4_rfscall_debug;
  97 static int nfs4_try_failover_any;
  98 int nfs4_utf8_debug = 0;
  99 #endif
 100 
 101 /*
 102  * NFSv4 readdir cache implementation
 103  */
 104 typedef struct rddir4_cache_impl {
 105         rddir4_cache    rc;             /* readdir cache element */
 106         kmutex_t        lock;           /* lock protects count */
 107         uint_t          count;          /* reference count */
 108         avl_node_t      tree;           /* AVL tree link */
 109 } rddir4_cache_impl;
 110 
 111 static int rddir4_cache_compar(const void *, const void *);
 112 static void rddir4_cache_free(rddir4_cache_impl *);
 113 static rddir4_cache *rddir4_cache_alloc(int);
 114 static void rddir4_cache_hold(rddir4_cache *);
 115 static int try_failover(enum clnt_stat);
 116 
 117 static int nfs4_readdir_cache_hits = 0;
 118 static int nfs4_readdir_cache_waits = 0;
 119 static int nfs4_readdir_cache_misses = 0;
 120 
 121 /*
 122  * Shared nfs4 functions
 123  */
 124 
 125 /*
 126  * Copy an nfs_fh4.  The destination storage (to->nfs_fh4_val) must already
 127  * be allocated.
 128  */
 129 
 130 void
 131 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to)
 132 {
 133         to->nfs_fh4_len = from->nfs_fh4_len;
 134         bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len);
 135 }
 136 
 137 /*
 138  * nfs4cmpfh - compare 2 filehandles.
 139  * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
 140  * "less" than the second, +1 if the first is "greater" than the second.
 141  */
 142 
 143 int
 144 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2)
 145 {
 146         const char *c1, *c2;
 147 
 148         if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len)
 149                 return (-1);
 150         if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len)
 151                 return (1);
 152         for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val;
 153             c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len;
 154             c1++, c2++) {
 155                 if (*c1 < *c2)
 156                         return (-1);
 157                 if (*c1 > *c2)
 158                         return (1);
 159         }
 160 
 161         return (0);
 162 }
 163 
 164 /*
 165  * Compare two v4 filehandles.  Return zero if they're the same, non-zero
 166  * if they're not.  Like nfs4cmpfh(), but different filehandle
 167  * representation, and doesn't provide information about greater than or
 168  * less than.
 169  */
 170 
 171 int
 172 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2)
 173 {
 174         if (fh1->fh_len == fh2->fh_len)
 175                 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len));
 176 
 177         return (1);
 178 }
 179 
 180 int
 181 stateid4_cmp(stateid4 *s1, stateid4 *s2)
 182 {
 183         if (bcmp(s1, s2, sizeof (stateid4)) == 0)
 184                 return (1);
 185         else
 186                 return (0);
 187 }
 188 
 189 nfsstat4
 190 puterrno4(int error)
 191 {
 192         switch (error) {
 193         case 0:
 194                 return (NFS4_OK);
 195         case EPERM:
 196                 return (NFS4ERR_PERM);
 197         case ENOENT:
 198                 return (NFS4ERR_NOENT);
 199         case EINTR:
 200                 return (NFS4ERR_IO);
 201         case EIO:
 202                 return (NFS4ERR_IO);
 203         case ENXIO:
 204                 return (NFS4ERR_NXIO);
 205         case ENOMEM:
 206                 return (NFS4ERR_RESOURCE);
 207         case EACCES:
 208                 return (NFS4ERR_ACCESS);
 209         case EBUSY:
 210                 return (NFS4ERR_IO);
 211         case EEXIST:
 212                 return (NFS4ERR_EXIST);
 213         case EXDEV:
 214                 return (NFS4ERR_XDEV);
 215         case ENODEV:
 216                 return (NFS4ERR_IO);
 217         case ENOTDIR:
 218                 return (NFS4ERR_NOTDIR);
 219         case EISDIR:
 220                 return (NFS4ERR_ISDIR);
 221         case EINVAL:
 222                 return (NFS4ERR_INVAL);
 223         case EMFILE:
 224                 return (NFS4ERR_RESOURCE);
 225         case EFBIG:
 226                 return (NFS4ERR_FBIG);
 227         case ENOSPC:
 228                 return (NFS4ERR_NOSPC);
 229         case EROFS:
 230                 return (NFS4ERR_ROFS);
 231         case EMLINK:
 232                 return (NFS4ERR_MLINK);
 233         case EDEADLK:
 234                 return (NFS4ERR_DEADLOCK);
 235         case ENOLCK:
 236                 return (NFS4ERR_DENIED);
 237         case EREMOTE:
 238                 return (NFS4ERR_SERVERFAULT);
 239         case ENOTSUP:
 240                 return (NFS4ERR_NOTSUPP);
 241         case EDQUOT:
 242                 return (NFS4ERR_DQUOT);
 243         case ENAMETOOLONG:
 244                 return (NFS4ERR_NAMETOOLONG);
 245         case EOVERFLOW:
 246                 return (NFS4ERR_INVAL);
 247         case ENOSYS:
 248                 return (NFS4ERR_NOTSUPP);
 249         case ENOTEMPTY:
 250                 return (NFS4ERR_NOTEMPTY);
 251         case EOPNOTSUPP:
 252                 return (NFS4ERR_NOTSUPP);
 253         case ESTALE:
 254                 return (NFS4ERR_STALE);
 255         case EAGAIN:
 256                 if (curthread->t_flag & T_WOULDBLOCK) {
 257                         curthread->t_flag &= ~T_WOULDBLOCK;
 258                         return (NFS4ERR_DELAY);
 259                 }
 260                 return (NFS4ERR_LOCKED);
 261         default:
 262                 return ((enum nfsstat4)error);
 263         }
 264 }
 265 
 266 int
 267 geterrno4(enum nfsstat4 status)
 268 {
 269         switch (status) {
 270         case NFS4_OK:
 271                 return (0);
 272         case NFS4ERR_PERM:
 273                 return (EPERM);
 274         case NFS4ERR_NOENT:
 275                 return (ENOENT);
 276         case NFS4ERR_IO:
 277                 return (EIO);
 278         case NFS4ERR_NXIO:
 279                 return (ENXIO);
 280         case NFS4ERR_ACCESS:
 281                 return (EACCES);
 282         case NFS4ERR_EXIST:
 283                 return (EEXIST);
 284         case NFS4ERR_XDEV:
 285                 return (EXDEV);
 286         case NFS4ERR_NOTDIR:
 287                 return (ENOTDIR);
 288         case NFS4ERR_ISDIR:
 289                 return (EISDIR);
 290         case NFS4ERR_INVAL:
 291                 return (EINVAL);
 292         case NFS4ERR_FBIG:
 293                 return (EFBIG);
 294         case NFS4ERR_NOSPC:
 295                 return (ENOSPC);
 296         case NFS4ERR_ROFS:
 297                 return (EROFS);
 298         case NFS4ERR_MLINK:
 299                 return (EMLINK);
 300         case NFS4ERR_NAMETOOLONG:
 301                 return (ENAMETOOLONG);
 302         case NFS4ERR_NOTEMPTY:
 303                 return (ENOTEMPTY);
 304         case NFS4ERR_DQUOT:
 305                 return (EDQUOT);
 306         case NFS4ERR_STALE:
 307                 return (ESTALE);
 308         case NFS4ERR_BADHANDLE:
 309                 return (ESTALE);
 310         case NFS4ERR_BAD_COOKIE:
 311                 return (EINVAL);
 312         case NFS4ERR_NOTSUPP:
 313                 return (EOPNOTSUPP);
 314         case NFS4ERR_TOOSMALL:
 315                 return (EINVAL);
 316         case NFS4ERR_SERVERFAULT:
 317                 return (EIO);
 318         case NFS4ERR_BADTYPE:
 319                 return (EINVAL);
 320         case NFS4ERR_DELAY:
 321                 return (ENXIO);
 322         case NFS4ERR_SAME:
 323                 return (EPROTO);
 324         case NFS4ERR_DENIED:
 325                 return (ENOLCK);
 326         case NFS4ERR_EXPIRED:
 327                 return (EPROTO);
 328         case NFS4ERR_LOCKED:
 329                 return (EACCES);
 330         case NFS4ERR_GRACE:
 331                 return (EAGAIN);
 332         case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */
 333                 return (ESTALE);
 334         case NFS4ERR_SHARE_DENIED:
 335                 return (EACCES);
 336         case NFS4ERR_WRONGSEC:
 337                 return (EPERM);
 338         case NFS4ERR_CLID_INUSE:
 339                 return (EAGAIN);
 340         case NFS4ERR_RESOURCE:
 341                 return (EAGAIN);
 342         case NFS4ERR_MOVED:
 343                 return (EPROTO);
 344         case NFS4ERR_NOFILEHANDLE:
 345                 return (EIO);
 346         case NFS4ERR_MINOR_VERS_MISMATCH:
 347                 return (ENOTSUP);
 348         case NFS4ERR_STALE_CLIENTID:
 349                 return (EIO);
 350         case NFS4ERR_STALE_STATEID:
 351                 return (EIO);
 352         case NFS4ERR_OLD_STATEID:
 353                 return (EIO);
 354         case NFS4ERR_BAD_STATEID:
 355                 return (EIO);
 356         case NFS4ERR_BAD_SEQID:
 357                 return (EIO);
 358         case NFS4ERR_NOT_SAME:
 359                 return (EPROTO);
 360         case NFS4ERR_LOCK_RANGE:
 361                 return (EPROTO);
 362         case NFS4ERR_SYMLINK:
 363                 return (EPROTO);
 364         case NFS4ERR_RESTOREFH:
 365                 return (EPROTO);
 366         case NFS4ERR_LEASE_MOVED:
 367                 return (EPROTO);
 368         case NFS4ERR_ATTRNOTSUPP:
 369                 return (ENOTSUP);
 370         case NFS4ERR_NO_GRACE:
 371                 return (EPROTO);
 372         case NFS4ERR_RECLAIM_BAD:
 373                 return (EPROTO);
 374         case NFS4ERR_RECLAIM_CONFLICT:
 375                 return (EPROTO);
 376         case NFS4ERR_BADXDR:
 377                 return (EINVAL);
 378         case NFS4ERR_LOCKS_HELD:
 379                 return (EIO);
 380         case NFS4ERR_OPENMODE:
 381                 return (EACCES);
 382         case NFS4ERR_BADOWNER:
 383                 /*
 384                  * Client and server are in different DNS domains
 385                  * and the NFSMAPID_DOMAIN in /etc/default/nfs
 386                  * doesn't match.  No good answer here.  Return
 387                  * EACCESS, which translates to "permission denied".
 388                  */
 389                 return (EACCES);
 390         case NFS4ERR_BADCHAR:
 391                 return (EINVAL);
 392         case NFS4ERR_BADNAME:
 393                 return (EINVAL);
 394         case NFS4ERR_BAD_RANGE:
 395                 return (EIO);
 396         case NFS4ERR_LOCK_NOTSUPP:
 397                 return (ENOTSUP);
 398         case NFS4ERR_OP_ILLEGAL:
 399                 return (EINVAL);
 400         case NFS4ERR_DEADLOCK:
 401                 return (EDEADLK);
 402         case NFS4ERR_FILE_OPEN:
 403                 return (EACCES);
 404         case NFS4ERR_ADMIN_REVOKED:
 405                 return (EPROTO);
 406         case NFS4ERR_CB_PATH_DOWN:
 407                 return (EPROTO);
 408         case NFS4ERR_BADSESSION:
 409                 return (EIO);
 410         default:
 411 #ifdef DEBUG
 412                 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d",
 413                     status);
 414 #endif
 415                 return ((int)status);
 416         }
 417 }
 418 
 419 void
 420 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op)
 421 {
 422         nfs4_server_t *server;
 423 
 424         /*
 425          * Return if already printed/queued a msg
 426          * for this mount point.
 427          */
 428         if (mi->mi_flags & MI4_BADOWNER_DEBUG)
 429                 return;
 430         /*
 431          * Happens once per client <-> server pair.
 432          */
 433         if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
 434             mi->mi_flags & MI4_INT))
 435                 return;
 436 
 437         server = find_nfs4_server(mi);
 438         if (server == NULL) {
 439                 nfs_rw_exit(&mi->mi_recovlock);
 440                 return;
 441         }
 442 
 443         if (!(server->s_flags & N4S_BADOWNER_DEBUG)) {
 444                 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
 445                     "!NFSMAPID_DOMAIN does not match"
 446                     " the server: %s domain.\n"
 447                     "Please check configuration",
 448                     mi->mi_curr_serv->sv_hostname);
 449                 server->s_flags |= N4S_BADOWNER_DEBUG;
 450         }
 451         mutex_exit(&server->s_lock);
 452         nfs4_server_rele(server);
 453         nfs_rw_exit(&mi->mi_recovlock);
 454 
 455         /*
 456          * Happens once per mntinfo4_t.
 457          * This error is deemed as one of the recovery facts "RF_BADOWNER",
 458          * queue this in the mesg queue for this mount_info. This message
 459          * is not printed, meaning its absent from id_to_dump_solo_fact()
 460          * but its there for inspection if the queue is ever dumped/inspected.
 461          */
 462         mutex_enter(&mi->mi_lock);
 463         if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) {
 464                 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op,
 465                     FALSE, NULL, 0, NULL);
 466                 mi->mi_flags |= MI4_BADOWNER_DEBUG;
 467         }
 468         mutex_exit(&mi->mi_lock);
 469 }
 470 
 471 int
 472 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime)
 473 {
 474         int64_t sec;
 475         int32_t nsec;
 476 
 477         /*
 478          * Here check that the nfsv4 time is valid for the system.
 479          * nfsv4 time value is a signed 64-bit, and the system time
 480          * may be either int64_t or int32_t (depends on the kernel),
 481          * so if the kernel is 32-bit, the nfsv4 time value may not fit.
 482          */
 483 #ifndef _LP64
 484         if (! NFS4_TIME_OK(ntime->seconds)) {
 485                 return (EOVERFLOW);
 486         }
 487 #endif
 488 
 489         /* Invalid to specify 1 billion (or more) nsecs */
 490         if (ntime->nseconds >= 1000000000)
 491                 return (EINVAL);
 492 
 493         if (ntime->seconds < 0) {
 494                 sec = ntime->seconds + 1;
 495                 nsec = -1000000000 + ntime->nseconds;
 496         } else {
 497                 sec = ntime->seconds;
 498                 nsec = ntime->nseconds;
 499         }
 500 
 501         vatime->tv_sec = sec;
 502         vatime->tv_nsec = nsec;
 503 
 504         return (0);
 505 }
 506 
 507 int
 508 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime)
 509 {
 510         int64_t sec;
 511         uint32_t nsec;
 512 
 513         /*
 514          * nfsv4 time value is a signed 64-bit, and the system time
 515          * may be either int64_t or int32_t (depends on the kernel),
 516          * so all system time values will fit.
 517          */
 518         if (vatime->tv_nsec >= 0) {
 519                 sec = vatime->tv_sec;
 520                 nsec = vatime->tv_nsec;
 521         } else {
 522                 sec = vatime->tv_sec - 1;
 523                 nsec = 1000000000 + vatime->tv_nsec;
 524         }
 525         ntime->seconds = sec;
 526         ntime->nseconds = nsec;
 527 
 528         return (0);
 529 }
 530 
 531 /*
 532  * Converts a utf8 string to a valid null terminated filename string.
 533  *
 534  * XXX - Not actually translating the UTF-8 string as per RFC 2279.
 535  *       For now, just validate that the UTF-8 string off the wire
 536  *       does not have characters that will freak out UFS, and leave
 537  *       it at that.
 538  */
 539 char *
 540 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s)
 541 {
 542         ASSERT(lenp != NULL);
 543 
 544         if (u8s == NULL || u8s->utf8string_len <= 0 ||
 545             u8s->utf8string_val == NULL)
 546                 return (NULL);
 547 
 548         /*
 549          * Check for obvious illegal filename chars
 550          */
 551         if (utf8_strchr(u8s, '/') != NULL) {
 552 #ifdef DEBUG
 553                 if (nfs4_utf8_debug) {
 554                         char *path;
 555                         int len = u8s->utf8string_len;
 556 
 557                         path = kmem_alloc(len + 1, KM_SLEEP);
 558                         bcopy(u8s->utf8string_val, path, len);
 559                         path[len] = '\0';
 560 
 561                         zcmn_err(getzoneid(), CE_WARN,
 562                             "Invalid UTF-8 filename: %s", path);
 563 
 564                         kmem_free(path, len + 1);
 565                 }
 566 #endif
 567                 return (NULL);
 568         }
 569 
 570         return (utf8_to_str(u8s, lenp, s));
 571 }
 572 
 573 /*
 574  * Converts a utf8 string to a C string.
 575  * kmem_allocs a new string if not supplied
 576  */
 577 char *
 578 utf8_to_str(utf8string *str, uint_t *lenp, char *s)
 579 {
 580         char    *sp;
 581         char    *u8p;
 582         int     len;
 583         int      i;
 584 
 585         ASSERT(lenp != NULL);
 586 
 587         if (str == NULL)
 588                 return (NULL);
 589 
 590         u8p = str->utf8string_val;
 591         len = str->utf8string_len;
 592         if (len <= 0 || u8p == NULL) {
 593                 if (s)
 594                         *s = '\0';
 595                 return (NULL);
 596         }
 597 
 598         sp = s;
 599         if (sp == NULL)
 600                 sp = kmem_alloc(len + 1, KM_SLEEP);
 601 
 602         /*
 603          * At least check for embedded nulls
 604          */
 605         for (i = 0; i < len; i++) {
 606                 sp[i] = u8p[i];
 607                 if (u8p[i] == '\0') {
 608 #ifdef  DEBUG
 609                         zcmn_err(getzoneid(), CE_WARN,
 610                             "Embedded NULL in UTF-8 string");
 611 #endif
 612                         if (s == NULL)
 613                                 kmem_free(sp, len + 1);
 614                         return (NULL);
 615                 }
 616         }
 617         sp[len] = '\0';
 618         *lenp = len + 1;
 619 
 620         return (sp);
 621 }
 622 
 623 /*
 624  * str_to_utf8 - converts a null-terminated C string to a utf8 string
 625  */
 626 utf8string *
 627 str_to_utf8(char *nm, utf8string *str)
 628 {
 629         int len;
 630 
 631         if (str == NULL)
 632                 return (NULL);
 633 
 634         if (nm == NULL || *nm == '\0') {
 635                 str->utf8string_len = 0;
 636                 str->utf8string_val = NULL;
 637         }
 638 
 639         len = strlen(nm);
 640 
 641         str->utf8string_val = kmem_alloc(len, KM_SLEEP);
 642         str->utf8string_len = len;
 643         bcopy(nm, str->utf8string_val, len);
 644 
 645         return (str);
 646 }
 647 
 648 utf8string *
 649 utf8_copy(utf8string *src, utf8string *dest)
 650 {
 651         if (src == NULL)
 652                 return (NULL);
 653         if (dest == NULL)
 654                 return (NULL);
 655 
 656         if (src->utf8string_len > 0) {
 657                 dest->utf8string_val = kmem_alloc(src->utf8string_len,
 658                     KM_SLEEP);
 659                 bcopy(src->utf8string_val, dest->utf8string_val,
 660                     src->utf8string_len);
 661                 dest->utf8string_len = src->utf8string_len;
 662         } else {
 663                 dest->utf8string_val = NULL;
 664                 dest->utf8string_len = 0;
 665         }
 666 
 667         return (dest);
 668 }
 669 
 670 int
 671 utf8_compare(const utf8string *a, const utf8string *b)
 672 {
 673         int mlen, cmp;
 674         int alen, blen;
 675         char *aval, *bval;
 676 
 677         if ((a == NULL) && (b == NULL))
 678                 return (0);
 679         else if (a == NULL)
 680                 return (-1);
 681         else if (b == NULL)
 682                 return (1);
 683 
 684         alen = a->utf8string_len;
 685         blen = b->utf8string_len;
 686         aval = a->utf8string_val;
 687         bval = b->utf8string_val;
 688 
 689         if (((alen == 0) || (aval == NULL)) &&
 690             ((blen == 0) || (bval == NULL)))
 691                 return (0);
 692         else if ((alen == 0) || (aval == NULL))
 693                 return (-1);
 694         else if ((blen == 0) || (bval == NULL))
 695                 return (1);
 696 
 697         mlen = MIN(alen, blen);
 698         cmp = strncmp(aval, bval, mlen);
 699 
 700         if ((cmp == 0) && (alen == blen))
 701                 return (0);
 702         else if ((cmp == 0) && (alen < blen))
 703                 return (-1);
 704         else if (cmp == 0)
 705                 return (1);
 706         else if (cmp < 0)
 707                 return (-1);
 708         return (1);
 709 }
 710 
 711 /*
 712  * utf8_dir_verify - checks that the utf8 string is valid
 713  */
 714 int
 715 utf8_dir_verify(utf8string *str)
 716 {
 717         char *nm;
 718         int len;
 719 
 720         if (str == NULL)
 721                 return (0);
 722 
 723         nm = str->utf8string_val;
 724         len = str->utf8string_len;
 725         if (nm == NULL || len == 0) {
 726                 return (0);
 727         }
 728 
 729         if (len == 1 && nm[0] == '.')
 730                 return (0);
 731         if (len == 2 && nm[0] == '.' && nm[1] == '.')
 732                 return (0);
 733 
 734         if (utf8_strchr(str, '/') != NULL)
 735                 return (0);
 736 
 737         if (utf8_strchr(str, '\0') != NULL)
 738                 return (0);
 739 
 740         return (1);
 741 }
 742 
 743 /*
 744  * from rpcsec module (common/rpcsec)
 745  */
 746 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
 747 extern void sec_clnt_freeh(AUTH *);
 748 extern void sec_clnt_freeinfo(struct sec_data *);
 749 
 750 /*
 751  * authget() gets an auth handle based on the security
 752  * information from the servinfo in mountinfo.
 753  * The auth handle is stored in ch_client->cl_auth.
 754  *
 755  * First security flavor of choice is to use sv_secdata
 756  * which is initiated by the client. If that fails, get
 757  * secinfo from the server and then select one from the
 758  * server secinfo list .
 759  *
 760  * For RPCSEC_GSS flavor, upon success, a secure context is
 761  * established between client and server.
 762  */
 763 int
 764 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr)
 765 {
 766         int error, i;
 767 
 768         /*
 769          * SV4_TRYSECINFO indicates to try the secinfo list from
 770          * sv_secinfo until a successful one is reached. Point
 771          * sv_currsec to the selected security mechanism for
 772          * later sessions.
 773          */
 774         (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
 775         if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) {
 776                 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count;
 777                     i++) {
 778                         if (!(error = sec_clnt_geth(ch_client,
 779                             &svp->sv_secinfo->sdata[i],
 780                             cr, &ch_client->cl_auth))) {
 781 
 782                                 svp->sv_currsec = &svp->sv_secinfo->sdata[i];
 783                                 svp->sv_secinfo->index = i;
 784                                 /* done */
 785                                 svp->sv_flags &= ~SV4_TRYSECINFO;
 786                                 break;
 787                         }
 788 
 789                         /*
 790                          * Allow the caller retry with the security flavor
 791                          * pointed by svp->sv_secinfo->index when
 792                          * ETIMEDOUT/ECONNRESET occurs.
 793                          */
 794                         if (error == ETIMEDOUT || error == ECONNRESET) {
 795                                 svp->sv_secinfo->index = i;
 796                                 break;
 797                         }
 798                 }
 799         } else {
 800                 /* sv_currsec points to one of the entries in sv_secinfo */
 801                 if (svp->sv_currsec) {
 802                         error = sec_clnt_geth(ch_client, svp->sv_currsec, cr,
 803                             &ch_client->cl_auth);
 804                 } else {
 805                         /* If it's null, use sv_secdata. */
 806                         error = sec_clnt_geth(ch_client, svp->sv_secdata, cr,
 807                             &ch_client->cl_auth);
 808                 }
 809         }
 810         nfs_rw_exit(&svp->sv_lock);
 811 
 812         return (error);
 813 }
 814 
 815 /*
 816  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
 817  */
 818 int
 819 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
 820     struct chtab **chp, struct nfs4_clnt *nfscl, mntinfo4_t *mi)
 821 {
 822         struct chhead *ch, *newch;
 823         struct chhead **plistp;
 824         struct chtab *cp;
 825         int error;
 826         k_sigset_t smask;
 827 
 828         if (newcl == NULL || chp == NULL || ci == NULL)
 829                 return (EINVAL);
 830 
 831         *newcl = NULL;
 832         *chp = NULL;
 833 
 834         /*
 835          * Find an unused handle or create one
 836          */
 837         newch = NULL;
 838         /*
 839          * Update statistics based on minor version number
 840          */
 841         nfscl->nfscl_stat[NFS4_MINORVERSION(mi)].clgets.value.ui64++;
 842 top:
 843         /*
 844          * Find the correct entry in the cache to check for free
 845          * client handles.  The search is based on the RPC program
 846          * number, program version number, dev_t for the transport
 847          * device, and the protocol family.
 848          */
 849         mutex_enter(&nfscl->nfscl_chtable4_lock);
 850         plistp = &nfscl->nfscl_chtable4;
 851         for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
 852                 if (ch->ch_prog == ci->cl_prog &&
 853                     ch->ch_vers == ci->cl_vers &&
 854                     ch->ch_dev == svp->sv_knconf->knc_rdev &&
 855                     (strcmp(ch->ch_protofmly,
 856                     svp->sv_knconf->knc_protofmly) == 0))
 857                         break;
 858                 plistp = &ch->ch_next;
 859         }
 860 
 861         /*
 862          * If we didn't find a cache entry for this quadruple, then
 863          * create one.  If we don't have one already preallocated,
 864          * then drop the cache lock, create one, and then start over.
 865          * If we did have a preallocated entry, then just add it to
 866          * the front of the list.
 867          */
 868         if (ch == NULL) {
 869                 if (newch == NULL) {
 870                         mutex_exit(&nfscl->nfscl_chtable4_lock);
 871                         newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
 872                         newch->ch_timesused = 0;
 873                         newch->ch_prog = ci->cl_prog;
 874                         newch->ch_vers = ci->cl_vers;
 875                         newch->ch_dev = svp->sv_knconf->knc_rdev;
 876                         newch->ch_protofmly = kmem_alloc(
 877                             strlen(svp->sv_knconf->knc_protofmly) + 1,
 878                             KM_SLEEP);
 879                         (void) strcpy(newch->ch_protofmly,
 880                             svp->sv_knconf->knc_protofmly);
 881                         newch->ch_list = NULL;
 882                         goto top;
 883                 }
 884                 ch = newch;
 885                 newch = NULL;
 886                 ch->ch_next = nfscl->nfscl_chtable4;
 887                 nfscl->nfscl_chtable4 = ch;
 888         /*
 889          * We found a cache entry, but if it isn't on the front of the
 890          * list, then move it to the front of the list to try to take
 891          * advantage of locality of operations.
 892          */
 893         } else if (ch != nfscl->nfscl_chtable4) {
 894                 *plistp = ch->ch_next;
 895                 ch->ch_next = nfscl->nfscl_chtable4;
 896                 nfscl->nfscl_chtable4 = ch;
 897         }
 898 
 899         /*
 900          * If there was a free client handle cached, then remove it
 901          * from the list, init it, and use it.
 902          */
 903         if (ch->ch_list != NULL) {
 904                 cp = ch->ch_list;
 905                 ch->ch_list = cp->ch_list;
 906                 mutex_exit(&nfscl->nfscl_chtable4_lock);
 907                 if (newch != NULL) {
 908                         kmem_free(newch->ch_protofmly,
 909                             strlen(newch->ch_protofmly) + 1);
 910                         kmem_free(newch, sizeof (*newch));
 911                 }
 912                 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
 913                     &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
 914 
 915                 /*
 916                  * Get an auth handle.
 917                  */
 918                 error = authget(svp, cp->ch_client, cr);
 919                 if (error || cp->ch_client->cl_auth == NULL) {
 920                         CLNT_DESTROY(cp->ch_client);
 921                         kmem_cache_free(chtab4_cache, cp);
 922                         return ((error != 0) ? error : EINTR);
 923                 }
 924                 ch->ch_timesused++;
 925                 *newcl = cp->ch_client;
 926                 *chp = cp;
 927                 return (0);
 928         }
 929 
 930         /*
 931          * There weren't any free client handles which fit, so allocate a
 932          * new one and use that.
 933          */
 934 #ifdef DEBUG
 935         atomic_add_64(&clstat4_debug.clalloc.value.ui64, 1);
 936 #endif
 937         mutex_exit(&nfscl->nfscl_chtable4_lock);
 938 
 939         nfscl->nfscl_stat[NFS4_MINORVERSION(mi)].cltoomany.value.ui64++;
 940         if (newch != NULL) {
 941                 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
 942                 kmem_free(newch, sizeof (*newch));
 943         }
 944 
 945         cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP);
 946         cp->ch_head = ch;
 947 
 948         sigintr(&smask, (int)ci->cl_flags & MI4_INT);
 949         error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
 950             ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
 951         sigunintr(&smask);
 952 
 953         if (error != 0) {
 954                 kmem_cache_free(chtab4_cache, cp);
 955 #ifdef DEBUG
 956         atomic_add_64(&clstat4_debug.clalloc.value.ui64, -1);
 957 #endif
 958                 /*
 959                  * Warning is unnecessary if error is EINTR.
 960                  */
 961                 if (error != EINTR) {
 962                         nfs_cmn_err(error, CE_WARN,
 963                             "clget: couldn't create handle: %m\n");
 964                 }
 965                 return (error);
 966         }
 967         (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
 968         auth_destroy(cp->ch_client->cl_auth);
 969 
 970 
 971 
 972         /*
 973          * Get an auth handle.
 974          */
 975         error = authget(svp, cp->ch_client, cr);
 976         if (error || cp->ch_client->cl_auth == NULL) {
 977                 CLNT_DESTROY(cp->ch_client);
 978                 kmem_cache_free(chtab4_cache, cp);
 979 #ifdef DEBUG
 980         atomic_add_64(&clstat4_debug.clalloc.value.ui64, -1);
 981 #endif
 982                 return ((error != 0) ? error : EINTR);
 983         }
 984         ch->ch_timesused++;
 985         *newcl = cp->ch_client;
 986         ASSERT(cp->ch_client->cl_nosignal == FALSE);
 987         *chp = cp;
 988         return (0);
 989 }
 990 
 991 int
 992 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
 993     struct chtab **chp, struct nfs4_clnt *nfscl)
 994 {
 995         clinfo_t ci;
 996         bool_t is_recov;
 997         int firstcall, error = 0;
 998 
 999         /*
1000          * Set read buffer size to rsize
1001          * and add room for RPC headers.
1002          */
1003         ci.cl_readsize = mi->mi_tsize;
1004         if (ci.cl_readsize != 0)
1005                 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
1006 
1007         /*
1008          * If soft mount and server is down just try once.
1009          * meaning: do not retransmit.
1010          */
1011         if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN))
1012                 ci.cl_retrans = 0;
1013         else
1014                 ci.cl_retrans = mi->mi_retrans;
1015 
1016         ci.cl_prog = mi->mi_prog;
1017         ci.cl_vers = mi->mi_vers;
1018         ci.cl_flags = mi->mi_flags;
1019 
1020         /*
1021          * clget4 calls authget() to get an auth handle. For RPCSEC_GSS
1022          * security flavor, the client tries to establish a security context
1023          * by contacting the server. If the connection is timed out or reset,
1024          * e.g. server reboot, we will try again.
1025          */
1026 
1027         /*
1028          * XXXrecovery:  We've already captured the nfs4_server_t in
1029          * start_op but we don't (yet) push it down through rfs4call()
1030          * and friends.  We need to do that, especially in the case of
1031          * an operation directed to the data server, so that we can
1032          * determine if this thread may be in recovery (non-pNFS, MDS, or DS).
1033          */
1034         is_recov = (curthread == mi->mi_recovthread);
1035         firstcall = 1;
1036 
1037         do {
1038                 error = clget4(&ci, svp, cr, newcl, chp, nfscl, mi);
1039 
1040                 if (error == 0)
1041                         break;
1042 
1043                 /*
1044                  * For forced unmount and zone shutdown, bail out but
1045                  * let the recovery thread do one more transmission.
1046                  */
1047                 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) &&
1048                     (!is_recov || !firstcall)) {
1049                         error = EIO;
1050                         break;
1051                 }
1052 
1053                 /* do not retry for soft mount */
1054                 if (!(mi->mi_flags & MI4_HARD))
1055                         break;
1056 
1057                 /* let the caller deal with the failover case */
1058                 if (FAILOVER_MOUNT4(mi))
1059                         break;
1060 
1061                 firstcall = 0;
1062 
1063         } while (error == ETIMEDOUT || error == ECONNRESET);
1064 
1065         return (error);
1066 }
1067 
1068 void
1069 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl)
1070 {
1071         if (cl->cl_auth != NULL) {
1072                 sec_clnt_freeh(cl->cl_auth);
1073                 cl->cl_auth = NULL;
1074         }
1075 
1076         if (!CLNT_CONTROL(cl, CLSET_TAG_CLEAR, (char *)NULL))
1077                 zcmn_err(getzoneid(), CE_WARN,
1078                     "Failed to clear tag on freed client handle");
1079 
1080         if (!(CLNT_CONTROL(cl, CLSET_BACKCHANNEL_CLEAR, NULL))) {
1081                 zcmn_err(getzoneid(), CE_WARN,
1082                     "Unable to clear backchannel on freed client handle %p",
1083                     (void *)cl);
1084         }
1085 
1086         /*
1087          * Timestamp this cache entry so that we know when it was last
1088          * used.
1089          */
1090         cp->ch_freed = gethrestime_sec();
1091 
1092         /*
1093          * Add the free client handle to the front of the list.
1094          * This way, the list will be sorted in youngest to oldest
1095          * order.
1096          */
1097         mutex_enter(&nfscl->nfscl_chtable4_lock);
1098         cp->ch_list = cp->ch_head->ch_list;
1099         cp->ch_head->ch_list = cp;
1100         mutex_exit(&nfscl->nfscl_chtable4_lock);
1101 }
1102 
1103 #define CL_HOLDTIME     60      /* time to hold client handles */
1104 
1105 static void
1106 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime)
1107 {
1108         struct chhead *ch;
1109         struct chtab *cp;       /* list of objects that can be reclaimed */
1110         struct chtab *cpe;
1111         struct chtab *cpl;
1112         struct chtab **cpp;
1113 #ifdef DEBUG
1114         int n = 0;
1115         clstat4_debug.clreclaim.value.ui64++;
1116 #endif
1117 
1118         /*
1119          * Need to reclaim some memory, so step through the cache
1120          * looking through the lists for entries which can be freed.
1121          */
1122         cp = NULL;
1123 
1124         mutex_enter(&nfscl->nfscl_chtable4_lock);
1125 
1126         /*
1127          * Here we step through each non-NULL quadruple and start to
1128          * construct the reclaim list pointed to by cp.  Note that
1129          * cp will contain all eligible chtab entries.  When this traversal
1130          * completes, chtab entries from the last quadruple will be at the
1131          * front of cp and entries from previously inspected quadruples have
1132          * been appended to the rear of cp.
1133          */
1134         for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
1135                 if (ch->ch_list == NULL)
1136                         continue;
1137                 /*
1138                  * Search each list for entries older then
1139                  * cl_holdtime seconds.  The lists are maintained
1140                  * in youngest to oldest order so that when the
1141                  * first entry is found which is old enough, then
1142                  * all of the rest of the entries on the list will
1143                  * be old enough as well.
1144                  */
1145                 cpl = ch->ch_list;
1146                 cpp = &ch->ch_list;
1147                 while (cpl != NULL &&
1148                     cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
1149                         cpp = &cpl->ch_list;
1150                         cpl = cpl->ch_list;
1151                 }
1152                 if (cpl != NULL) {
1153                         *cpp = NULL;
1154                         if (cp != NULL) {
1155                                 cpe = cpl;
1156                                 while (cpe->ch_list != NULL)
1157                                         cpe = cpe->ch_list;
1158                                 cpe->ch_list = cp;
1159                         }
1160                         cp = cpl;
1161                 }
1162         }
1163 
1164         mutex_exit(&nfscl->nfscl_chtable4_lock);
1165 
1166         /*
1167          * If cp is empty, then there is nothing to reclaim here.
1168          */
1169         if (cp == NULL)
1170                 return;
1171 
1172         /*
1173          * Step through the list of entries to free, destroying each client
1174          * handle and kmem_free'ing the memory for each entry.
1175          */
1176         while (cp != NULL) {
1177 #ifdef DEBUG
1178                 n++;
1179 #endif
1180                 CLNT_DESTROY(cp->ch_client);
1181                 cpl = cp->ch_list;
1182                 kmem_cache_free(chtab4_cache, cp);
1183                 cp = cpl;
1184         }
1185 
1186 #ifdef DEBUG
1187         /*
1188          * Update clalloc so that nfsstat shows the current number of
1189          * allocated client handles.
1190          */
1191         atomic_add_64(&clstat4_debug.clalloc.value.ui64, -n);
1192 #endif
1193 }
1194 
1195 /* ARGSUSED */
1196 static void
1197 clreclaim4(void *all)
1198 {
1199         struct nfs4_clnt *nfscl;
1200 
1201         /*
1202          * The system is low on memory; go through and try to reclaim some from
1203          * every zone on the system.
1204          */
1205         mutex_enter(&nfs4_clnt_list_lock);
1206         nfscl = list_head(&nfs4_clnt_list);
1207         for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl))
1208                 clreclaim4_zone(nfscl, CL_HOLDTIME);
1209         mutex_exit(&nfs4_clnt_list_lock);
1210 }
1211 
1212 /*
1213  * Minimum time-out values indexed by call type
1214  * These units are in "eights" of a second to avoid multiplies
1215  */
1216 static unsigned int minimum_timeo[] = {
1217         6, 7, 10
1218 };
1219 
1220 #define SHORTWAIT       (NFS_COTS_TIMEO / 10)
1221 
1222 /*
1223  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
1224  */
1225 #define MAXTIMO (20*hz)
1226 #define backoff(tim)    (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
1227 #define dobackoff(tim)  ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
1228 
1229 static int
1230 nfs4_rfscall(mntinfo4_t *mi, servinfo4_t *svp,
1231     rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1232     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue,
1233     enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl)
1234 {
1235         CLIENT *client;
1236         struct chtab *ch;
1237         cred_t *cr = icr;
1238         struct rpc_err rpcerr;
1239         enum clnt_stat status;
1240         int error;
1241         int ctlret;
1242         struct timeval wait;
1243         int timeo;              /* in units of hz */
1244         bool_t tryagain, is_recov;
1245         bool_t cred_cloned = FALSE;
1246         k_sigset_t smask;
1247 #ifdef DEBUG
1248         char *bufp;
1249 #endif
1250         int firstcall;
1251         struct nfs41_cb_info    *cbi;
1252         struct nfs4_server      *np;
1253 
1254         rpcerr.re_status = RPC_SUCCESS;
1255 
1256         /*
1257          * If we know that we are rebooting then let's
1258          * not bother with doing any over the wireness.
1259          */
1260         mutex_enter(&mi->mi_lock);
1261         if (mi->mi_flags & MI4_SHUTDOWN) {
1262                 mutex_exit(&mi->mi_lock);
1263                 return (EIO);
1264         }
1265         mutex_exit(&mi->mi_lock);
1266 
1267         /* For TSOL, use a new cred which has net_mac_aware flag */
1268         if (!cred_cloned && is_system_labeled()) {
1269                 cred_cloned = TRUE;
1270                 cr = crdup(icr);
1271                 (void) setpflags(NET_MAC_AWARE, 1, cr);
1272         }
1273 
1274         /*
1275          * clget() calls clnt_tli_kinit() which clears the xid, so we
1276          * are guaranteed to reprocess the retry as a new request.
1277          */
1278         if (svp == NULL)
1279                 svp = mi->mi_curr_serv;
1280         rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl);
1281         if (rpcerr.re_errno != 0)
1282                 return (rpcerr.re_errno);
1283 
1284         if (NFS4_MINORVERSION(mi) == 1) {
1285                 mutex_enter(&nfs4_server_lst_lock);
1286                 np = servinfo4_to_nfs4_server(svp);
1287                 mutex_exit(&nfs4_server_lst_lock);
1288 
1289                 if (np) {
1290                         if (np->s_program != 0 && (flags & RFS4CALL_SETCB)) {
1291                                 cbi = np->zone_globals->nfs4prog2cbinfo
1292                                     [np->s_program-NFS4_CALLBACK];
1293                                 if (cbi != NULL) {
1294                                         ctlret = CLNT_CONTROL(
1295                                             client, CLSET_CBSERVER_SETUP,
1296                                             (char *)&cbi->cb_rpc);




1297                                         if (ctlret == 0) {
1298                                                 zcmn_err(getzoneid(), CE_WARN,
1299                                                     "Failed to set client"
1300                                                     " handle as callback");
1301                                         }
1302                                 }
1303 
1304                                 if (!np->ssx.bi_rpc) {
1305                                         ctlret = CLNT_CONTROL(client,
1306                                             CLSET_BACKCHANNEL, NULL);
1307                                         if (ctlret == 0) {
1308                                                 zcmn_err(getzoneid(), CE_WARN,
1309                                                     "Failed to set client"
1310                                                     " handle as callback");
1311                                         }
1312                                 }
1313 
1314                                 /*
1315                                  * In case of non birpc, make sure rpc layer
1316                                  * reflects the same -- the below call sets
1317                                  * the RPC flag  non birpc.
1318                                  */
1319                                 if (NFS41_CHECK(mi, nfs41_birpc) == FALSE) {
1320                                         (void) CLNT_CONTROL(client,
1321                                             CLSET_NON_BIRPC, (char *)NULL);
1322                                 }
1323                         }
1324 
1325                         if (!CLNT_CONTROL(client, CLSET_TAG,
1326                             (char *)(np->ssx.sessionid)))
1327                                 zcmn_err(getzoneid(), CE_WARN,
1328                                     "Failed to set tag on client handle");
1329 
1330                         mutex_exit(&np->s_lock);
1331                         nfs4_server_rele(np);
1332                 }
1333         }
1334 
1335         timeo = (mi->mi_timeo * hz) / 10;
1336 
1337         /*
1338          * If hard mounted fs, retry call forever unless hard error
1339          * occurs.
1340          *
1341          * For forced unmount, let the recovery thread through but return
1342          * an error for all others.  This is so that user processes can
1343          * exit quickly.  The recovery thread bails out after one
1344          * transmission so that it can tell if it needs to continue.
1345          *
1346          * For zone shutdown, behave as above to encourage quick
1347          * process exit, but also fail quickly when servers have
1348          * timed out before and reduce the timeouts.
1349          */
1350 
1351         /*
1352          * XXXrecovery:  We've already captured the nfs4_server_t in
1353          * start_op but we don't (yet) push it down through rfs4call()
1354          * and friends.  We need to do that, especially in the case of
1355          * an operation directed to the data server, so that we can
1356          * determine if this thread may be in recovery (non-pNFS, MDS, or DS).
1357          */
1358         is_recov = (curthread == mi->mi_recovthread);
1359         firstcall = 1;
1360         do {
1361                 tryagain = FALSE;
1362 
1363                 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE,
1364                     "nfs4_rfscall: vfs_flag=0x%x, %s",
1365                     mi->mi_vfsp->vfs_flag,
1366                     is_recov ? "recov thread" : "not recov thread"));
1367 
1368                 /*
1369                  * It's possible while we're retrying the admin
1370                  * decided to reboot.
1371                  */
1372                 mutex_enter(&mi->mi_lock);
1373                 if (mi->mi_flags & MI4_SHUTDOWN) {
1374                         mutex_exit(&mi->mi_lock);
1375                         clfree4(client, ch, nfscl);
1376                         if (cred_cloned)
1377                                 crfree(cr);
1378                         return (EIO);
1379                 }
1380                 mutex_exit(&mi->mi_lock);
1381 
1382                 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) &&
1383                     (!is_recov || !firstcall)) {
1384                         clfree4(client, ch, nfscl);
1385                         if (cred_cloned)
1386                                 crfree(cr);
1387                         return (EIO);
1388                 }
1389 
1390                 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) {
1391                         mutex_enter(&mi->mi_lock);
1392                         if ((mi->mi_flags & MI4_TIMEDOUT) ||
1393                             !is_recov || !firstcall) {
1394                                 mutex_exit(&mi->mi_lock);
1395                                 clfree4(client, ch, nfscl);
1396                                 if (cred_cloned)
1397                                         crfree(cr);
1398                                 return (EIO);
1399                         }
1400                         mutex_exit(&mi->mi_lock);
1401                         timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10;
1402                 }
1403 
1404                 firstcall = 0;
1405                 TICK_TO_TIMEVAL(timeo, &wait);
1406 
1407                 /*
1408                  * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1409                  * and SIGTERM. (Preserving the existing masks).
1410                  * Mask out SIGINT if mount option nointr is specified.
1411                  */
1412                 sigintr(&smask, (int)mi->mi_flags & MI4_INT);
1413                 if (!(mi->mi_flags & MI4_INT))
1414                         client->cl_nosignal = TRUE;
1415 
1416                 /*
1417                  * If there is a current signal, then don't bother
1418                  * even trying to send out the request because we
1419                  * won't be able to block waiting for the response.
1420                  * Simply assume RPC_INTR and get on with it.
1421                  */
1422                 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1423                         status = RPC_INTR;
1424                 else {
1425                         status = CLNT_CALL(client, which, xdrargs, argsp,
1426                             xdrres, resp, wait);
1427                 }
1428 
1429                 if (!(mi->mi_flags & MI4_INT))
1430                         client->cl_nosignal = FALSE;
1431                 /*
1432                  * restore original signal mask
1433                  */
1434                 sigunintr(&smask);
1435 
1436                 switch (status) {
1437                 case RPC_SUCCESS:
1438                         break;
1439 
1440                 case RPC_INTR:
1441                         /*
1442                          * There is no way to recover from this error,
1443                          * even if mount option nointr is specified.
1444                          * SIGKILL, for example, cannot be blocked.
1445                          */
1446                         rpcerr.re_status = RPC_INTR;
1447                         rpcerr.re_errno = EINTR;
1448                         break;
1449 
1450                 case RPC_CONN_NOT_BOUND:
1451                         rpcerr.re_status = status;
1452                         rpcerr.re_errno = EIO;
1453                         break;
1454 
1455                 case RPC_UDERROR:
1456                         /*
1457                          * If the NFS server is local (vold) and
1458                          * it goes away then we get RPC_UDERROR.
1459                          * This is a retryable error, so we would
1460                          * loop, so check to see if the specific
1461                          * error was ECONNRESET, indicating that
1462                          * target did not exist at all.  If so,
1463                          * return with RPC_PROGUNAVAIL and
1464                          * ECONNRESET to indicate why.
1465                          */
1466                         CLNT_GETERR(client, &rpcerr);
1467                         if (rpcerr.re_errno == ECONNRESET) {
1468                                 rpcerr.re_status = RPC_PROGUNAVAIL;
1469                                 rpcerr.re_errno = ECONNRESET;
1470                                 break;
1471                         }
1472                         /*FALLTHROUGH*/
1473 
1474                 default:                /* probably RPC_TIMEDOUT */
1475 
1476                         if (IS_UNRECOVERABLE_RPC(status))
1477                                 break;
1478 
1479                         /*
1480                          * increment server not responding count
1481                          */
1482                         mutex_enter(&mi->mi_lock);
1483                         mi->mi_noresponse++;
1484                         mutex_exit(&mi->mi_lock);
1485 #ifdef DEBUG
1486                         clstat4_debug.noresponse.value.ui64++;
1487 #endif
1488                         /*
1489                          * On zone shutdown, mark server dead and move on.
1490                          */
1491                         if (zone_status_get(curproc->p_zone) >=
1492                             ZONE_IS_SHUTTING_DOWN) {
1493                                 mutex_enter(&mi->mi_lock);
1494                                 mi->mi_flags |= MI4_TIMEDOUT;
1495                                 mutex_exit(&mi->mi_lock);
1496                                 clfree4(client, ch, nfscl);
1497                                 if (cred_cloned)
1498                                         crfree(cr);
1499                                 return (EIO);
1500                         }
1501 
1502                         /*
1503                          * NFS client failover support:
1504                          * return and let the caller take care of
1505                          * failover.  We only return for failover mounts
1506                          * because otherwise we want the "not responding"
1507                          * message, the timer updates, etc.
1508                          */
1509                         if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) &&
1510                             (error = try_failover(status)) != 0) {
1511                                 clfree4(client, ch, nfscl);
1512                                 if (cred_cloned)
1513                                         crfree(cr);
1514                                 *rpc_statusp = status;
1515                                 return (error);
1516                         }
1517 
1518                         if (flags & RFSCALL_SOFT)
1519                                 break;
1520 
1521                         tryagain = TRUE;
1522 
1523                         /*
1524                          * The call is in progress (over COTS).
1525                          * Try the CLNT_CALL again, but don't
1526                          * print a noisy error message.
1527                          */
1528                         if (status == RPC_INPROGRESS)
1529                                 break;
1530 
1531                         timeo = backoff(timeo);
1532                         mutex_enter(&mi->mi_lock);
1533                         if (!(mi->mi_flags & MI4_PRINTED)) {
1534                                 mi->mi_flags |= MI4_PRINTED;
1535                                 mutex_exit(&mi->mi_lock);
1536                                 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 0, 0, 0,
1537                                     FALSE, NULL, 0, NULL);
1538                         } else
1539                                 mutex_exit(&mi->mi_lock);
1540 
1541                         if (*doqueue && nfs_has_ctty()) {
1542                                 *doqueue = 0;
1543                                 if (!(mi->mi_flags & MI4_NOPRINT))
1544                                         nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
1545                                             0, 0, 0, FALSE, NULL, 0, NULL);
1546                         }
1547                 }
1548         } while (tryagain);
1549 
1550         DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status,
1551             int, rpcerr.re_errno);
1552 
1553         if (status != RPC_SUCCESS) {
1554                 zoneid_t zoneid = mi->mi_zone->zone_id;
1555 
1556                 /*
1557                  * Let soft mounts use the timed out message.
1558                  */
1559                 if (status == RPC_INPROGRESS)
1560                         status = RPC_TIMEDOUT;
1561                 nfscl->nfscl_stat[NFS4_MINORVERSION(mi)].badcalls.value.ui64++;
1562                 if (status != RPC_INTR) {
1563                         mutex_enter(&mi->mi_lock);
1564                         mi->mi_flags |= MI4_DOWN;
1565                         mutex_exit(&mi->mi_lock);
1566                         CLNT_GETERR(client, &rpcerr);
1567 #ifdef DEBUG
1568                         bufp = clnt_sperror(client, svp->sv_hostname);
1569                         zprintf(zoneid, "NFS%d %s failed for %s\n",
1570                             mi->mi_vers, mi->mi_rfsnames[which], bufp);
1571                         if (nfs_has_ctty()) {
1572                                 if (!(mi->mi_flags & MI4_NOPRINT)) {
1573                                         uprintf("NFS%d %s failed for %s\n",
1574                                             mi->mi_vers, mi->mi_rfsnames[which],
1575                                             bufp);
1576                                 }
1577                         }
1578                         kmem_free(bufp, MAXPATHLEN);
1579 #else
1580                         zprintf(zoneid,
1581                             "NFS %s failed for server %s: error %d (%s)\n",
1582                             mi->mi_rfsnames[which], svp->sv_hostname,
1583                             status, clnt_sperrno(status));
1584                         if (nfs_has_ctty()) {
1585                                 if (!(mi->mi_flags & MI4_NOPRINT)) {
1586                                         uprintf(
1587                                 "NFS %s failed for server %s: error %d (%s)\n",
1588                                             mi->mi_rfsnames[which],
1589                                             svp->sv_hostname, status,
1590                                             clnt_sperrno(status));
1591                                 }
1592                         }
1593 #endif
1594                         /*
1595                          * when CLNT_CALL() fails with RPC_AUTHERROR,
1596                          * re_errno is set appropriately depending on
1597                          * the authentication error
1598                          */
1599                         if (status == RPC_VERSMISMATCH ||
1600                             status == RPC_PROGVERSMISMATCH)
1601                                 rpcerr.re_errno = EIO;
1602                 }
1603         } else {
1604                 /*
1605                  * Test the value of mi_down and mi_printed without
1606                  * holding the mi_lock mutex.  If they are both zero,
1607                  * then it is okay to skip the down and printed
1608                  * processing.  This saves on a mutex_enter and
1609                  * mutex_exit pair for a normal, successful RPC.
1610                  * This was just complete overhead.
1611                  */
1612                 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) {
1613                         mutex_enter(&mi->mi_lock);
1614                         mi->mi_flags &= ~MI4_DOWN;
1615                         if (mi->mi_flags & MI4_PRINTED) {
1616                                 mi->mi_flags &= ~MI4_PRINTED;
1617                                 mutex_exit(&mi->mi_lock);
1618                                 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1619                                         nfs4_queue_fact(RF_SRV_OK, mi, 0, 0,
1620                                             0, FALSE, NULL, 0, NULL);
1621                         } else
1622                                 mutex_exit(&mi->mi_lock);
1623                 }
1624 
1625                 if (*doqueue == 0) {
1626                         if (!(mi->mi_flags & MI4_NOPRINT) &&
1627                             !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1628                                 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0,
1629                                     FALSE, NULL, 0, NULL);
1630 
1631                         *doqueue = 1;
1632                 }
1633         }
1634 
1635         clfree4(client, ch, nfscl);
1636         if (cred_cloned)
1637                 crfree(cr);
1638 
1639         ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1640 
1641         TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d",
1642             rpcerr.re_errno);
1643 
1644         *rpc_statusp = status;
1645         return (rpcerr.re_errno);
1646 }
1647 
1648 /*
1649  * rfs4call - general wrapper for RPC calls initiated by the client
1650  * KLR-make this a nosequence rfs4call which will not add a sequence op
1651  * XXXrsb - External callers now user rfs4call() with RFS4CALL_NOSEQ.
1652  */
1653 static void
1654 rfs4call_nosequence(mntinfo4_t *mi, servinfo4_t *svp, COMPOUND4args_clnt *argsp,
1655     COMPOUND4res_clnt *resp, cred_t *cr, int *doqueue, int flags,
1656     nfs4_error_t *ep)
1657 {
1658         int i, error;
1659         enum clnt_stat rpc_status = NFS4_OK;
1660         int num_resops;
1661         struct nfs4_clnt *nfscl;
1662 
1663         ASSERT(nfs_zone() == mi->mi_zone);
1664         nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1665         ASSERT(nfscl != NULL);
1666         /*
1667          * Note that the first call will be accounted for the default
1668          * minor version, even if there are no mounts for that minor
1669          * version. The call may result in a minor vesion mismatch and
1670          * subsequent calls will get accounted correctly. It makes sense
1671          * to account the first call for the default minor version,
1672          * because the client thought that this call is for that minor
1673          * version. Same goes for the compound procedure as well.
1674          */
1675         nfscl->nfscl_stat[NFS4_MINORVERSION(mi)].calls.value.ui64++;
1676         mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++;
1677 
1678                 /* XXX - Set up minorversion */
1679         argsp->minor_vers = NFS4_MINORVERSION(mi);
1680 
1681         /* Set up the results struct for XDR usage */
1682         resp->argsp = argsp;
1683         resp->array = NULL;
1684         resp->status = 0;
1685         resp->decode_len = 0;
1686 
1687         error = nfs4_rfscall(mi, svp, NFSPROC4_COMPOUND,
1688             xdr_COMPOUND4args_clnt, (caddr_t)argsp,
1689             xdr_COMPOUND4res_clnt, (caddr_t)resp, cr,
1690             doqueue, &rpc_status, flags, nfscl);
1691 
1692         /*
1693          * Map the connection not bound rpc error to nfs
1694          * error. Currently with no connection binding enforcement
1695          * by the client, we won't hit this. With connection binding
1696          * enforcement in the future (with SSV), the below method is
1697          * needed to drive a bind_conn_to_session after a connection
1698          * loss by the client (See section - 2.10.10.1.4 of the draft)
1699          */
1700         if (error && rpc_status == RPC_CONN_NOT_BOUND) {
1701                 ep->error = 0;
1702                 ep->rpc_status = 0;
1703                 ep->stat = NFS4ERR_CONN_NOT_BOUND_TO_SESSION;
1704                 return;
1705         }
1706 
1707         /* Return now if it was any other RPC error */
1708         if (error) {
1709                 ep->error = error;
1710                 ep->stat = resp->status;
1711                 ep->rpc_status = rpc_status;
1712                 return;
1713         }
1714         /*
1715          * else we'll count the processed operations. Note that we will
1716          * NOT enter here in case of NFS4ERR_MINOR_VERS_MISMATCH.
1717          */
1718         num_resops = resp->decode_len;
1719         for (i = 0; i < num_resops; i++) {
1720                 /*
1721                  * Count the individual operations
1722                  * processed by the server.
1723                  */
1724                 if (NFS4_MINORVERSION(mi) == NFS4_MINOR_v1) {
1725                         if (resp->array[i].resop >= NFSPROC4_NULL &&
1726                             resp->array[i].resop <= OP_RECLAIM_COMPLETE) {
1727                                 mi->mi_reqs[resp->array[i].resop].value.ui64++;
1728                         }
1729                 } else if (NFS4_MINORVERSION(mi) == NFS4_MINOR_v0) {
1730                         if (resp->array[i].resop >= NFSPROC4_NULL &&
1731                             resp->array[i].resop <= OP_RELEASE_LOCKOWNER) {
1732                                 mi->mi_reqs[resp->array[i].resop].value.ui64++;
1733                         }
1734                 }
1735         }
1736 
1737         ep->error = 0;
1738         ep->stat = resp->status;
1739         ep->rpc_status = rpc_status;
1740 }
1741 
1742 void
1743 rfs41_call(mntinfo4_t *mi, servinfo4_t *svp, COMPOUND4args_clnt *argsp,
1744         COMPOUND4res_clnt *resp, cred_t *cr, int *doqueue, int flags,
1745         nfs4_error_t *ep)
1746 {
1747         nfs4_slot_t             *slot;
1748         SEQUENCE4res            *seqres;
1749         struct nfs4_server      *np;
1750         COMPOUND4args_clnt      rfs_args, *rfsargp;
1751         COMPOUND4res_clnt       rfs_res, *rfsresp;
1752         int                     add_seq = 0;
1753 
1754         /*
1755          * XXXrsb - The following code is likely to change
1756          * For now, we have a pointer from the servinfo4 to the nfs4_server
1757          * If we have a servinfo4 and the pointer is valid, then use it.
1758          * One note, we may have to deal with the "np == NULL" case.
1759          */
1760         if (svp && svp->sv_ds_n4sp) {
1761                 np = svp->sv_ds_n4sp;
1762                 nfs4_server_hold(np);
1763         } else {
1764                 np = find_nfs4_server(mi);
1765                 ASSERT(np != NULL);
1766                 mutex_exit(&np->s_lock);
1767         }
1768 
1769 
1770         /*
1771          * Allocate another args array so we can insert
1772          * a SEQUENCE Op as the first operation, copy already
1773          * built args into it also.
1774          */
1775         if (argsp->array->argop != OP_SEQUENCE) {
1776                 rfs_args.ctag = argsp->ctag;
1777                 rfs_args.array_len = argsp->array_len + 1;
1778                 rfs_args.array = kmem_zalloc(sizeof (nfs_argop4) *
1779                     rfs_args.array_len, KM_SLEEP);
1780 
1781                 bcopy(argsp->array, rfs_args.array + 1,
1782                     sizeof (nfs_argop4) * argsp->array_len);
1783 
1784                 ASSERT(argsp->array_len >= 1);
1785                 rfs_args.array->argop = OP_SEQUENCE;
1786                 rfsargp = &rfs_args;
1787                 rfsresp = &rfs_res;
1788                 add_seq = 1;
1789         } else {
1790                 rfsargp = argsp;
1791                 rfsresp = resp;
1792         }
1793 
1794         /* Set up the sequence OP */
1795 
1796         nfs4sequence_setup(&np->ssx, rfsargp, &slot);
1797 
1798         /*
1799          * Send it using rfs4call_nosequence()
1800          * XXXrsb - this will likely be refactored with the rest of
1801          * the rfs4call() family
1802          */
1803         rfs4call_nosequence(mi, svp, rfsargp, rfsresp, cr, doqueue, flags, ep);
1804 
1805 #if     0
1806         zcmn_err(mi->mi_zone->zone_id, CE_WARN,
1807             "Tag: %x SEQUENCE slot: %x seq: %x estatus: %x nstatus: %x",
1808             rfsargp->ctag,
1809             rfsargp->array->nfs_argop4_u.opsequence.sa_slotid,
1810             rfsargp->array->nfs_argop4_u.opsequence.sa_sequenceid,
1811             ep->error,
1812             rfsresp->array != NULL ?
1813             rfsresp->array->nfs_resop4_u.opsequence.status : 0);
1814 
1815         if (ep->error || ep->stat || ep->rpc_status)
1816                 cmn_err(CE_WARN, "rfs4call failed: %d, %d, %d",
1817                     ep->error, ep->stat, ep->rpc_status);
1818 #endif
1819 
1820         nfs4sequence_fin(&np->ssx, rfsresp, slot, ep);
1821 
1822         /*
1823          * If the OTW call failed completely, or if the
1824          * results array is NULL, just get out
1825          */
1826         if (ep->error || (ep->stat && rfsresp->array == NULL)) {
1827 
1828                 if (ep->error == 0) {
1829                         ep->error = geterrno4(ep->stat);
1830                 }
1831 
1832                 if (add_seq)
1833                         kmem_free(rfs_args.array,
1834                             sizeof (nfs_argop4) * rfs_args.array_len);
1835 
1836                 nfs4_server_rele(np);
1837                 return;
1838         }
1839 
1840         /*
1841          * Check the results of the sequence op.  If it failed and we
1842          * added it for the caller, then we don't have any results
1843          * to return.
1844          */
1845         seqres = &rfsresp->array->nfs_resop4_u.opsequence;
1846         if (seqres->sr_status != NFS4_OK) {
1847 
1848                 cmn_err(CE_WARN, "rfs4call: sequence OP failed %d",
1849                     seqres->sr_status);
1850 
1851                 if (add_seq) {
1852                         kmem_free(rfs_args.array,
1853                             sizeof (nfs_argop4) * rfs_args.array_len);
1854                         resp->status = seqres->sr_status;
1855                         resp->array_len = resp->decode_len = 0;
1856                         resp->array = NULL;
1857                 }
1858                 /* XXX - xdr_free? free cpy */
1859                 nfs4_server_rele(np);
1860                 return;
1861         }
1862 
1863         /*
1864          * Update lease time if we have state since SEQUENCE op was successful
1865          */
1866         mutex_enter(&np->s_lock);
1867         if (np->lease_valid == NFS4_LEASE_VALID && np->state_ref_count)
1868                 np->last_renewal_time = gethrestime_sec();
1869         mutex_exit(&np->s_lock);
1870 
1871         /*
1872          * We some results of interest to the, so
1873          * Allocate an additional response array which doesn't have
1874          * SEQUENCE op results, copy results to it if not just a
1875          * SEQUENCE op for lease renewal.
1876          */
1877         if (add_seq) {
1878                 resp->status = rfsresp->status;
1879                 resp->array_len =
1880                     rfsresp->array_len == 0 ? 0 :rfsresp->array_len - 1;
1881                 resp->decode_len = rfsresp->decode_len == 0 ? 0 :
1882                     rfsresp->decode_len - 1;
1883                 resp->argsp = argsp;
1884                 if (resp->array_len > 0) {
1885                         ASSERT(rfs_res.array != NULL);
1886                         resp->array =
1887                             kmem_alloc(sizeof (nfs_resop4) *
1888                             resp->array_len, KM_SLEEP);
1889                         bcopy(rfsresp->array + 1, resp->array,
1890                             sizeof (nfs_resop4) * resp->array_len);
1891                 } else {
1892                         resp->array = NULL;
1893                 }
1894                 kmem_free(rfs_args.array,
1895                     sizeof (nfs_argop4) * rfs_args.array_len);
1896                 kmem_free(rfs_res.array,
1897                     sizeof (nfs_resop4) * rfs_res.array_len);
1898         }
1899         nfs4_server_rele(np);
1900 }
1901 
1902 void
1903 rfs4call(mntinfo4_t *mi, servinfo4_t *svp, COMPOUND4args_clnt *argsp,
1904         COMPOUND4res_clnt *resp, cred_t *cr, int *doqueue, int flags,
1905         nfs4_error_t *ep)
1906 {
1907         if (NFS4_MINORVERSION(mi) == 0 || (flags & RFS4CALL_NOSEQ)) {
1908                 rfs4call_nosequence(mi, svp, argsp, resp, cr, doqueue,
1909                     flags, ep);
1910                 return;
1911         }
1912         rfs41_call(mi, svp, argsp, resp, cr, doqueue, flags, ep);
1913 }
1914 
1915 /*
1916  * nfs4rename_update - updates stored state after a rename.  Currently this
1917  * is the path of the object and anything under it, and the filehandle of
1918  * the renamed object.
1919  */
1920 void
1921 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm)
1922 {
1923         sfh4_update(VTOR4(renvp)->r_fh, nfh4p);
1924         fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm);
1925 }
1926 
1927 /*
1928  * Routine to look up the filehandle for the given path and rootvp.
1929  *
1930  * Return values:
1931  * - success: returns zero and *statp is set to NFS4_OK, and *fhp is
1932  *   updated.
1933  * - error: return value (errno value) and/or *statp is set appropriately.
1934  */
1935 #define RML_ORDINARY    1
1936 #define RML_NAMED_ATTR  2
1937 #define RML_ATTRDIR     3
1938 
1939 static void
1940 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp,
1941     int filetype, cred_t *cr,
1942     nfs_fh4 *fhp, nfs4_ga_res_t *garp,          /* fh, attrs for object */
1943     nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp,        /* fh, attrs for parent */
1944     nfs4_error_t *ep)
1945 {
1946         COMPOUND4args_clnt args;
1947         COMPOUND4res_clnt res;
1948         nfs_argop4 *argop;
1949         nfs_resop4 *resop;
1950         int num_argops;
1951         lookup4_param_t lookuparg;
1952         nfs_fh4 *tmpfhp;
1953         int doqueue = 1;
1954         char *path;
1955         mntinfo4_t *mi;
1956 
1957         ASSERT(fname != NULL);
1958         ASSERT(rootvp->v_type == VDIR);
1959 
1960         mi = VTOMI4(rootvp);
1961         path = fn_path(fname);
1962         switch (filetype) {
1963         case RML_NAMED_ATTR:
1964                 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR;
1965                 args.ctag = TAG_REMAP_LOOKUP_NA;
1966                 break;
1967         case RML_ATTRDIR:
1968                 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR;
1969                 args.ctag = TAG_REMAP_LOOKUP_AD;
1970                 break;
1971         case RML_ORDINARY:
1972                 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1973                 args.ctag = TAG_REMAP_LOOKUP;
1974                 break;
1975         default:
1976                 ep->error = EINVAL;
1977                 return;
1978         }
1979         lookuparg.argsp = &args;
1980         lookuparg.resp = &res;
1981         lookuparg.header_len = 1;       /* Putfh */
1982         lookuparg.trailer_len = 0;
1983         lookuparg.ga_bits = MI4_DEFAULT_ATTRMAP(mi);
1984         lookuparg.mi = VTOMI4(rootvp);
1985 
1986         (void) nfs4lookup_setup(path, &lookuparg, 1);
1987 
1988         /* 0: putfh directory */
1989         argop = args.array;
1990         argop[0].argop = OP_CPUTFH;
1991         argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh;
1992 
1993         num_argops = args.array_len;
1994 
1995         rfs4call(mi, NULL, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1996 
1997         if (ep->error || res.status != NFS4_OK)
1998                 goto exit;
1999 
2000         /* get the object filehandle */
2001         resop = &res.array[res.array_len - 2];
2002         if (resop->resop != OP_GETFH) {
2003                 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
2004                     0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
2005                 ep->stat = NFS4ERR_SERVERFAULT;
2006                 goto exit;
2007         }
2008         tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
2009         if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
2010                 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
2011                     tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
2012                     TAG_NONE, 0, 0);
2013                 ep->stat = NFS4ERR_SERVERFAULT;
2014                 goto exit;
2015         }
2016         fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
2017         nfs_fh4_copy(tmpfhp, fhp);
2018 
2019         /* get the object attributes */
2020         resop = &res.array[res.array_len - 1];
2021         if (garp && resop->resop == OP_GETATTR)
2022                 *garp = resop->nfs_resop4_u.opgetattr.ga_res;
2023 
2024         /* See if there are enough fields in the response for parent info */
2025         if ((int)res.array_len - 5 <= 0)
2026                 goto exit;
2027 
2028         /* get the parent filehandle */
2029         resop = &res.array[res.array_len - 5];
2030         if (resop->resop != OP_GETFH) {
2031                 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
2032                     0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
2033                 ep->stat = NFS4ERR_SERVERFAULT;
2034                 goto exit;
2035         }
2036         tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
2037         if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
2038                 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
2039                     tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
2040                     TAG_NONE, 0, 0);
2041                 ep->stat = NFS4ERR_SERVERFAULT;
2042                 goto exit;
2043         }
2044         pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
2045         nfs_fh4_copy(tmpfhp, pfhp);
2046 
2047         /* get the parent attributes */
2048         resop = &res.array[res.array_len - 4];
2049         if (pgarp && resop->resop == OP_GETATTR)
2050                 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res;
2051 
2052 exit:
2053         /*
2054          * It is too hard to remember where all the OP_LOOKUPs are
2055          */
2056         nfs4args_lookup_free(argop, num_argops);
2057         kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
2058 
2059         if (!ep->error)
2060                 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2061         kmem_free(path, strlen(path)+1);
2062 }
2063 
2064 /*
2065  * NFS client failover / volatile filehandle support
2066  *
2067  * Recover the filehandle for the given rnode.
2068  *
2069  * Errors are returned via the nfs4_error_t parameter.
2070  */
2071 
2072 void
2073 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
2074 {
2075         int is_stub;
2076         rnode4_t *rp = VTOR4(vp);
2077         vnode_t *rootvp = NULL;
2078         vnode_t *dvp = NULL;
2079         cred_t *cr, *cred_otw;
2080         nfs4_ga_res_t gar, pgar;
2081         nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
2082         int filetype = RML_ORDINARY;
2083         nfs4_recov_state_t recov = {NULL, 0, 0};
2084         int badfhcount = 0;
2085         nfs4_open_stream_t *osp = NULL;
2086         bool_t first_time = TRUE;       /* first time getting OTW cred */
2087         bool_t last_time = FALSE;       /* last time getting OTW cred */
2088 
2089         NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
2090             "nfs4_remap_file: remapping %s", rnode4info(rp)));
2091         ASSERT(nfs4_consistent_type(vp));
2092 
2093         if (vp->v_flag & VROOT) {
2094                 nfs4_remap_root(mi, ep, flags);
2095                 return;
2096         }
2097 
2098         /*
2099          * Given the root fh, use the path stored in
2100          * the rnode to find the fh for the new server.
2101          */
2102         ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2103         if (ep->error != 0)
2104                 return;
2105 
2106         cr = curthread->t_cred;
2107         ASSERT(cr != NULL);
2108 get_remap_cred:
2109         /*
2110          * Releases the osp, if it is provided.
2111          * Puts a hold on the cred_otw and the new osp (if found).
2112          */
2113         cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp,
2114             &first_time, &last_time);
2115         ASSERT(cred_otw != NULL);
2116 
2117         if (rp->r_flags & R4ISXATTR) {
2118                 filetype = RML_NAMED_ATTR;
2119                 (void) vtodv(vp, &dvp, cred_otw, FALSE);
2120         }
2121 
2122         if (vp->v_flag & V_XATTRDIR) {
2123                 filetype = RML_ATTRDIR;
2124         }
2125 
2126         if (filetype == RML_ORDINARY && rootvp->v_type == VREG) {
2127                 /* file mount, doesn't need a remap */
2128                 goto done;
2129         }
2130 
2131 again:
2132         remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw,
2133             &newfh, &gar, &newpfh, &pgar, ep);
2134 
2135         NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
2136             "nfs4_remap_file: remap_lookup returned %d/%d",
2137             ep->error, ep->stat));
2138 
2139         if (last_time == FALSE && ep->error == EACCES) {
2140                 crfree(cred_otw);
2141                 if (dvp != NULL)
2142                         VN_RELE(dvp);
2143                 goto get_remap_cred;
2144         }
2145         if (ep->error != 0)
2146                 goto done;
2147 
2148         switch (ep->stat) {
2149         case NFS4_OK:
2150                 badfhcount = 0;
2151                 if (recov.rs_flags & NFS4_RS_DELAY_MSG) {
2152                         mutex_enter(&rp->r_statelock);
2153                         rp->r_delay_interval = 0;
2154                         mutex_exit(&rp->r_statelock);
2155                         uprintf("NFS File Available..\n");
2156                 }
2157                 break;
2158         case NFS4ERR_FHEXPIRED:
2159         case NFS4ERR_BADHANDLE:
2160                 /*
2161                  * If we ran into filehandle problems, we should try to
2162                  * remap the root vnode first and hope life gets better.
2163                  * But we need to avoid loops.
2164                  */
2165                 if (badfhcount++ > 0)
2166                         goto done;
2167                 if (newfh.nfs_fh4_len != 0) {
2168                         kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2169                         newfh.nfs_fh4_len = 0;
2170                 }
2171                 if (newpfh.nfs_fh4_len != 0) {
2172                         kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2173                         newpfh.nfs_fh4_len = 0;
2174                 }
2175                 /* relative path - remap rootvp then retry */
2176                 VN_RELE(rootvp);
2177                 rootvp = NULL;
2178                 nfs4_remap_root(mi, ep, flags);
2179                 if (ep->error != 0 || ep->stat != NFS4_OK)
2180                         goto done;
2181                 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2182                 if (ep->error != 0)
2183                         goto done;
2184                 goto again;
2185         case NFS4ERR_DELAY:
2186                 badfhcount = 0;
2187                 nfs4_set_delay_wait(vp);
2188                 ep->error = nfs4_wait_for_delay(vp, &recov, 0);
2189                 if (ep->error != 0)
2190                         goto done;
2191                 goto again;
2192         case NFS4ERR_ACCESS:
2193                 /* get new cred, try again */
2194                 if (last_time == TRUE)
2195                         goto done;
2196                 if (dvp != NULL)
2197                         VN_RELE(dvp);
2198                 crfree(cred_otw);
2199                 goto get_remap_cred;
2200         default:
2201                 goto done;
2202         }
2203 
2204         /*
2205          * Check on the new and old rnodes before updating;
2206          * if the vnode type or size changes, issue a warning
2207          * and mark the file dead.
2208          */
2209         mutex_enter(&rp->r_statelock);
2210         if (flags & NFS4_REMAP_CKATTRS) {
2211                 if (vp->v_type != gar.n4g_va.va_type ||
2212                     (vp->v_type != VDIR &&
2213                     rp->r_size != gar.n4g_va.va_size)) {
2214                         NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
2215                             "nfs4_remap_file: size %d vs. %d, type %d vs. %d",
2216                             (int)rp->r_size, (int)gar.n4g_va.va_size,
2217                             vp->v_type, gar.n4g_va.va_type));
2218                         mutex_exit(&rp->r_statelock);
2219                         nfs4_queue_event(RE_FILE_DIFF, mi,
2220                             rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0,
2221                             TAG_NONE, TAG_NONE, 0, 0);
2222                         nfs4_fail_recov(vp, NULL, 0, NFS4_OK);
2223                         goto done;
2224                 }
2225         }
2226         ASSERT(gar.n4g_va.va_type != VNON);
2227         rp->r_server = mi->mi_curr_serv;
2228 
2229         /*
2230          * Turn this object into a "stub" object if we
2231          * crossed an underlying server fs boundary.
2232          *
2233          * This stub will be for a mirror-mount.
2234          *
2235          * See comment in r4_do_attrcache() for more details.
2236          */
2237         is_stub = 0;
2238         if (gar.n4g_fsid_valid) {
2239                 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0);
2240                 rp->r_srv_fsid = gar.n4g_fsid;
2241                 if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid))
2242                         is_stub = 1;
2243                 nfs_rw_exit(&rp->r_server->sv_lock);
2244 #ifdef DEBUG
2245         } else {
2246                 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
2247                     "remap_file: fsid attr not provided by server.  rp=%p",
2248                     (void *)rp));
2249 #endif
2250         }
2251         if (is_stub)
2252                 r4_stub_mirrormount(rp);
2253         else
2254                 r4_stub_none(rp);
2255         mutex_exit(&rp->r_statelock);
2256         nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */
2257         sfh4_update(rp->r_fh, &newfh);
2258         ASSERT(nfs4_consistent_type(vp));
2259 
2260         /*
2261          * If we got parent info, use it to update the parent
2262          */
2263         if (newpfh.nfs_fh4_len != 0) {
2264                 if (rp->r_svnode.sv_dfh != NULL)
2265                         sfh4_update(rp->r_svnode.sv_dfh, &newpfh);
2266                 if (dvp != NULL) {
2267                         /* force update of attrs */
2268                         nfs4_attrcache_noinval(dvp, &pgar, gethrtime());
2269                 }
2270         }
2271 done:
2272         if (newfh.nfs_fh4_len != 0)
2273                 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2274         if (newpfh.nfs_fh4_len != 0)
2275                 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2276         if (cred_otw != NULL)
2277                 crfree(cred_otw);
2278         if (rootvp != NULL)
2279                 VN_RELE(rootvp);
2280         if (dvp != NULL)
2281                 VN_RELE(dvp);
2282         if (osp != NULL)
2283                 open_stream_rele(osp, rp);
2284 }
2285 
2286 /*
2287  * Client-side failover support: remap the filehandle for vp if it appears
2288  * necessary.  errors are returned via the nfs4_error_t parameter; though,
2289  * if there is a problem, we will just try again later.
2290  */
2291 
2292 void
2293 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
2294 {
2295         if (vp == NULL)
2296                 return;
2297 
2298         if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY))
2299                 return;
2300 
2301         if (VTOR4(vp)->r_server == mi->mi_curr_serv)
2302                 return;
2303 
2304         nfs4_remap_file(mi, vp, flags, ep);
2305 }
2306 
2307 /*
2308  * nfs4_make_dotdot() - find or create a parent vnode of a non-root node.
2309  *
2310  * Our caller has a filehandle for ".." relative to a particular
2311  * directory object.  We want to find or create a parent vnode
2312  * with that filehandle and return it.  We can of course create
2313  * a vnode from this filehandle, but we need to also make sure
2314  * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR)
2315  * that we have a parent FH for future reopens as well.  If
2316  * we have a remap failure, we won't be able to reopen this
2317  * file, but we won't treat that as fatal because a reopen
2318  * is at least unlikely.  Someday nfs4_reopen() should look
2319  * for a missing parent FH and try a remap to recover from it.
2320  *
2321  * need_start_op argument indicates whether this function should
2322  * do a start_op before calling remap_lookup().  This should
2323  * be FALSE, if you are the recovery thread or in an op; otherwise,
2324  * set it to TRUE.
2325  */
2326 int
2327 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp,
2328     cred_t *cr, vnode_t **vpp, int need_start_op)
2329 {
2330         mntinfo4_t *mi = VTOMI4(dvp);
2331         nfs4_fname_t *np = NULL, *pnp = NULL;
2332         vnode_t *vp = NULL, *rootvp = NULL;
2333         rnode4_t *rp;
2334         nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
2335         nfs4_ga_res_t gar, pgar;
2336         vattr_t va, pva;
2337         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
2338         nfs4_sharedfh_t *sfh = NULL, *psfh = NULL;
2339         nfs4_recov_state_t recov_state;
2340 
2341 #ifdef DEBUG
2342         /*
2343          * ensure need_start_op is correct
2344          */
2345         {
2346                 int no_need_start_op = (tsd_get(nfs4_tsd_key) ||
2347                     (curthread == mi->mi_recovthread));
2348                 /* C needs a ^^ operator! */
2349                 ASSERT(((need_start_op) && (!no_need_start_op)) ||
2350                     ((! need_start_op) && (no_need_start_op)));
2351         }
2352 #endif
2353         ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone());
2354 
2355         NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE,
2356             "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp,
2357             rnode4info(VTOR4(dvp))));
2358 
2359         /*
2360          * rootvp might be needed eventually. Holding it now will
2361          * ensure that r4find_unlocked() will find it, if ".." is the root.
2362          */
2363         e.error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2364         if (e.error != 0)
2365                 goto out;
2366         rp = r4find_unlocked(fhp, mi->mi_vfsp);
2367         if (rp != NULL) {
2368                 *vpp = RTOV4(rp);
2369                 VN_RELE(rootvp);
2370                 return (0);
2371         }
2372 
2373         /*
2374          * Since we don't have the rnode, we have to go over the wire.
2375          * remap_lookup() can get all of the filehandles and attributes
2376          * we need in one operation.
2377          */
2378         np = fn_parent(VTOSV(dvp)->sv_name);
2379         ASSERT(np != NULL);
2380 
2381         recov_state.rs_flags = 0;
2382         recov_state.rs_num_retry_despite_err = 0;
2383 recov_retry:
2384         if (need_start_op) {
2385                 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP,
2386                     &recov_state, NULL);
2387                 if (e.error != 0) {
2388                         goto out;
2389                 }
2390         }
2391         va.va_type = VNON;
2392         pva.va_type = VNON;
2393         remap_lookup(np, rootvp, RML_ORDINARY, cr,
2394             &newfh, &gar, &newpfh, &pgar, &e);
2395         if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) {
2396                 if (need_start_op) {
2397                         bool_t abort;
2398 
2399                         abort = nfs4_start_recovery(&e, mi,
2400                             rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL);
2401                         if (abort) {
2402                                 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2403                                     &recov_state, FALSE);
2404                                 if (e.error == 0)
2405                                         e.error = EIO;
2406                                 goto out;
2407                         }
2408                         nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2409                             &recov_state, TRUE);
2410                         goto recov_retry;
2411                 }
2412                 if (e.error == 0)
2413                         e.error = EIO;
2414                 goto out;
2415         }
2416 
2417         if (!e.error) {
2418                 va = gar.n4g_va;
2419                 pva = pgar.n4g_va;
2420         }
2421 
2422         if ((e.error != 0) ||
2423             (va.va_type != VDIR)) {
2424                 if (need_start_op)
2425                         nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2426                             &recov_state, FALSE);
2427                 if (e.error == 0)
2428                         e.error = EIO;
2429                 goto out;
2430         }
2431 
2432         if (e.stat != NFS4_OK) {
2433                 if (need_start_op)
2434                         nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2435                             &recov_state, FALSE);
2436                 e.error = EIO;
2437                 goto out;
2438         }
2439 
2440         /*
2441          * It is possible for remap_lookup() to return with no error,
2442          * but without providing the parent filehandle and attrs.
2443          */
2444         if (pva.va_type != VDIR) {
2445                 /*
2446                  * Call remap_lookup() again, this time with the
2447                  * newpfh and pgar args in the first position.
2448                  */
2449                 pnp = fn_parent(np);
2450                 if (pnp != NULL) {
2451                         remap_lookup(pnp, rootvp, RML_ORDINARY, cr,
2452                             &newpfh, &pgar, NULL, NULL, &e);
2453                         if (nfs4_needs_recovery(&e, FALSE,
2454                             mi->mi_vfsp)) {
2455                                 if (need_start_op) {
2456                                         bool_t abort;
2457 
2458                                         abort = nfs4_start_recovery(&e, mi,
2459                                             rootvp, NULL, NULL, NULL,
2460                                             OP_LOOKUP, NULL);
2461                                         if (abort) {
2462                                                 nfs4_end_fop(mi, rootvp, NULL,
2463                                                     OH_LOOKUP, &recov_state,
2464                                                     FALSE);
2465                                                 if (e.error == 0)
2466                                                         e.error = EIO;
2467                                                 goto out;
2468                                         }
2469                                         nfs4_end_fop(mi, rootvp, NULL,
2470                                             OH_LOOKUP, &recov_state, TRUE);
2471                                         goto recov_retry;
2472                                 }
2473                                 if (e.error == 0)
2474                                         e.error = EIO;
2475                                 goto out;
2476                         }
2477 
2478                         if (e.stat != NFS4_OK) {
2479                                 if (need_start_op)
2480                                         nfs4_end_fop(mi, rootvp, NULL,
2481                                             OH_LOOKUP, &recov_state, FALSE);
2482                                 e.error = EIO;
2483                                 goto out;
2484                         }
2485                 }
2486                 if ((pnp == NULL) ||
2487                     (e.error != 0) ||
2488                     (pva.va_type == VNON)) {
2489                         if (need_start_op)
2490                                 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2491                                     &recov_state, FALSE);
2492                         if (e.error == 0)
2493                                 e.error = EIO;
2494                         goto out;
2495                 }
2496         }
2497         ASSERT(newpfh.nfs_fh4_len != 0);
2498         if (need_start_op)
2499                 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE);
2500         psfh = sfh4_get(&newpfh, mi);
2501 
2502         sfh = sfh4_get(&newfh, mi);
2503         vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t);
2504 
2505 out:
2506         if (np != NULL)
2507                 fn_rele(&np);
2508         if (pnp != NULL)
2509                 fn_rele(&pnp);
2510         if (newfh.nfs_fh4_len != 0)
2511                 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2512         if (newpfh.nfs_fh4_len != 0)
2513                 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2514         if (sfh != NULL)
2515                 sfh4_rele(&sfh);
2516         if (psfh != NULL)
2517                 sfh4_rele(&psfh);
2518         if (rootvp != NULL)
2519                 VN_RELE(rootvp);
2520         *vpp = vp;
2521         return (e.error);
2522 }
2523 
2524 #ifdef DEBUG
2525 size_t r_path_memuse = 0;
2526 #endif
2527 
2528 /*
2529  * NFS client failover support
2530  *
2531  * sv4_free() frees the malloc'd portion of a "servinfo_t".
2532  */
2533 void
2534 sv4_free(servinfo4_t *svp)
2535 {
2536         servinfo4_t *next;
2537         struct knetconfig *knconf;
2538 
2539         while (svp != NULL) {
2540                 next = svp->sv_next;
2541                 if (svp->sv_dhsec)
2542                         sec_clnt_freeinfo(svp->sv_dhsec);
2543                 if (svp->sv_secdata)
2544                         sec_clnt_freeinfo(svp->sv_secdata);
2545                 if (svp->sv_save_secinfo &&
2546                     svp->sv_save_secinfo != svp->sv_secinfo)
2547                         secinfo_free(svp->sv_save_secinfo);
2548                 if (svp->sv_secinfo)
2549                         secinfo_free(svp->sv_secinfo);
2550                 if (svp->sv_hostname && svp->sv_hostnamelen > 0)
2551                         kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
2552                 knconf = svp->sv_knconf;
2553                 if (knconf != NULL) {
2554                         if (knconf->knc_protofmly != NULL)
2555                                 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2556                         if (knconf->knc_proto != NULL)
2557                                 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2558                         kmem_free(knconf, sizeof (*knconf));
2559                 }
2560                 knconf = svp->sv_origknconf;
2561                 if (knconf != NULL) {
2562                         if (knconf->knc_protofmly != NULL)
2563                                 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2564                         if (knconf->knc_proto != NULL)
2565                                 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2566                         kmem_free(knconf, sizeof (*knconf));
2567                 }
2568                 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
2569                         kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
2570                 if (svp->sv_path != NULL) {
2571                         kmem_free(svp->sv_path, svp->sv_pathlen);
2572                 }
2573                 nfs_rw_destroy(&svp->sv_lock);
2574 
2575                 /*
2576                  * If we have an nfs4_server from a pnfs data server...
2577                  * XXXrsb This may go away or change
2578                  */
2579                 if (svp->sv_ds_n4sp)
2580                         nfs4_server_rele(svp->sv_ds_n4sp);
2581 
2582                 kmem_free(svp, sizeof (*svp));
2583                 svp = next;
2584         }
2585 }
2586 
2587 void
2588 nfs4_printfhandle(nfs4_fhandle_t *fhp)
2589 {
2590         int *ip;
2591         char *buf;
2592         size_t bufsize;
2593         char *cp;
2594 
2595         /*
2596          * 13 == "(file handle:"
2597          * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times
2598          *      1 == ' '
2599          *      8 == maximum strlen of "%x"
2600          * 3 == ")\n\0"
2601          */
2602         bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3;
2603         buf = kmem_alloc(bufsize, KM_NOSLEEP);
2604         if (buf == NULL)
2605                 return;
2606 
2607         cp = buf;
2608         (void) strcpy(cp, "(file handle:");
2609         while (*cp != '\0')
2610                 cp++;
2611         for (ip = (int *)fhp->fh_buf;
2612             ip < (int *)&fhp->fh_buf[fhp->fh_len];
2613             ip++) {
2614                 (void) sprintf(cp, " %x", *ip);
2615                 while (*cp != '\0')
2616                         cp++;
2617         }
2618         (void) strcpy(cp, ")\n");
2619 
2620         zcmn_err(getzoneid(), CE_CONT, "%s", buf);
2621 
2622         kmem_free(buf, bufsize);
2623 }
2624 
2625 /*
2626  * The NFSv4 readdir cache subsystem.
2627  *
2628  * We provide a set of interfaces to allow the rest of the system to utilize
2629  * a caching mechanism while encapsulating the details of the actual
2630  * implementation.  This should allow for better maintainability and
2631  * extensibility by consolidating the implementation details in one location.
2632  */
2633 
2634 /*
2635  * Comparator used by AVL routines.
2636  */
2637 static int
2638 rddir4_cache_compar(const void *x, const void *y)
2639 {
2640         rddir4_cache_impl *ai = (rddir4_cache_impl *)x;
2641         rddir4_cache_impl *bi = (rddir4_cache_impl *)y;
2642         rddir4_cache *a = &ai->rc;
2643         rddir4_cache *b = &bi->rc;
2644 
2645         if (a->nfs4_cookie == b->nfs4_cookie) {
2646                 if (a->buflen == b->buflen)
2647                         return (0);
2648                 if (a->buflen < b->buflen)
2649                         return (-1);
2650                 return (1);
2651         }
2652 
2653         if (a->nfs4_cookie < b->nfs4_cookie)
2654                         return (-1);
2655 
2656         return (1);
2657 }
2658 
2659 /*
2660  * Allocate an opaque handle for the readdir cache.
2661  */
2662 void
2663 rddir4_cache_create(rnode4_t *rp)
2664 {
2665         ASSERT(rp->r_dir == NULL);
2666 
2667         rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
2668 
2669         avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl),
2670             offsetof(rddir4_cache_impl, tree));
2671 }
2672 
2673 /*
2674  *  Purge the cache of all cached readdir responses.
2675  */
2676 void
2677 rddir4_cache_purge(rnode4_t *rp)
2678 {
2679         rddir4_cache_impl       *rdip;
2680         rddir4_cache_impl       *nrdip;
2681 
2682         ASSERT(MUTEX_HELD(&rp->r_statelock));
2683 
2684         if (rp->r_dir == NULL)
2685                 return;
2686 
2687         rdip = avl_first(rp->r_dir);
2688 
2689         while (rdip != NULL) {
2690                 nrdip = AVL_NEXT(rp->r_dir, rdip);
2691                 avl_remove(rp->r_dir, rdip);
2692                 rdip->rc.flags &= ~RDDIRCACHED;
2693                 rddir4_cache_rele(rp, &rdip->rc);
2694                 rdip = nrdip;
2695         }
2696         ASSERT(avl_numnodes(rp->r_dir) == 0);
2697 }
2698 
2699 /*
2700  * Destroy the readdir cache.
2701  */
2702 void
2703 rddir4_cache_destroy(rnode4_t *rp)
2704 {
2705         ASSERT(MUTEX_HELD(&rp->r_statelock));
2706         if (rp->r_dir == NULL)
2707                 return;
2708 
2709         rddir4_cache_purge(rp);
2710         avl_destroy(rp->r_dir);
2711         kmem_free(rp->r_dir, sizeof (avl_tree_t));
2712         rp->r_dir = NULL;
2713 }
2714 
2715 /*
2716  * Locate a readdir response from the readdir cache.
2717  *
2718  * Return values:
2719  *
2720  * NULL - If there is an unrecoverable situation like the operation may have
2721  *        been interrupted.
2722  *
2723  * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller.
2724  *                  The flags are set approprately, such that the caller knows
2725  *                  what state the entry is in.
2726  */
2727 rddir4_cache *
2728 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count)
2729 {
2730         rddir4_cache_impl       *rdip = NULL;
2731         rddir4_cache_impl       srdip;
2732         rddir4_cache            *srdc;
2733         rddir4_cache            *rdc = NULL;
2734         rddir4_cache            *nrdc = NULL;
2735         avl_index_t             where;
2736 
2737 top:
2738         ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
2739         ASSERT(MUTEX_HELD(&rp->r_statelock));
2740         /*
2741          * Check to see if the readdir cache has been disabled.  If so, then
2742          * simply allocate an rddir4_cache entry and return it, since caching
2743          * operations do not apply.
2744          */
2745         if (rp->r_dir == NULL) {
2746                 if (nrdc == NULL) {
2747                         /*
2748                          * Drop the lock because we are doing a sleeping
2749                          * allocation.
2750                          */
2751                         mutex_exit(&rp->r_statelock);
2752                         rdc = rddir4_cache_alloc(KM_SLEEP);
2753                         rdc->nfs4_cookie = cookie;
2754                         rdc->buflen = count;
2755                         mutex_enter(&rp->r_statelock);
2756                         return (rdc);
2757                 }
2758                 return (nrdc);
2759         }
2760 
2761         srdc = &srdip.rc;
2762         srdc->nfs4_cookie = cookie;
2763         srdc->buflen = count;
2764 
2765         rdip = avl_find(rp->r_dir, &srdip, &where);
2766 
2767         /*
2768          * If we didn't find an entry then create one and insert it
2769          * into the cache.
2770          */
2771         if (rdip == NULL) {
2772                 /*
2773                  * Check for the case where we have made a second pass through
2774                  * the cache due to a lockless allocation.  If we find that no
2775                  * thread has already inserted this entry, do the insert now
2776                  * and return.
2777                  */
2778                 if (nrdc != NULL) {
2779                         avl_insert(rp->r_dir, nrdc->data, where);
2780                         nrdc->flags |= RDDIRCACHED;
2781                         rddir4_cache_hold(nrdc);
2782                         return (nrdc);
2783                 }
2784 
2785 #ifdef DEBUG
2786                 nfs4_readdir_cache_misses++;
2787 #endif
2788                 /*
2789                  * First, try to allocate an entry without sleeping.  If that
2790                  * fails then drop the lock and do a sleeping allocation.
2791                  */
2792                 nrdc = rddir4_cache_alloc(KM_NOSLEEP);
2793                 if (nrdc != NULL) {
2794                         nrdc->nfs4_cookie = cookie;
2795                         nrdc->buflen = count;
2796                         avl_insert(rp->r_dir, nrdc->data, where);
2797                         nrdc->flags |= RDDIRCACHED;
2798                         rddir4_cache_hold(nrdc);
2799                         return (nrdc);
2800                 }
2801 
2802                 /*
2803                  * Drop the lock and do a sleeping allocation.  We incur
2804                  * additional overhead by having to search the cache again,
2805                  * but this case should be rare.
2806                  */
2807                 mutex_exit(&rp->r_statelock);
2808                 nrdc = rddir4_cache_alloc(KM_SLEEP);
2809                 nrdc->nfs4_cookie = cookie;
2810                 nrdc->buflen = count;
2811                 mutex_enter(&rp->r_statelock);
2812                 /*
2813                  * We need to take another pass through the cache
2814                  * since we dropped our lock to perform the alloc.
2815                  * Another thread may have come by and inserted the
2816                  * entry we are interested in.
2817                  */
2818                 goto top;
2819         }
2820 
2821         /*
2822          * Check to see if we need to free our entry.  This can happen if
2823          * another thread came along beat us to the insert.  We can
2824          * safely call rddir4_cache_free directly because no other thread
2825          * would have a reference to this entry.
2826          */
2827         if (nrdc != NULL)
2828                 rddir4_cache_free((rddir4_cache_impl *)nrdc->data);
2829 
2830 #ifdef DEBUG
2831         nfs4_readdir_cache_hits++;
2832 #endif
2833         /*
2834          * Found something.  Make sure it's ready to return.
2835          */
2836         rdc = &rdip->rc;
2837         rddir4_cache_hold(rdc);
2838         /*
2839          * If the cache entry is in the process of being filled in, wait
2840          * until this completes.  The RDDIRWAIT bit is set to indicate that
2841          * someone is waiting and when the thread currently filling the entry
2842          * is done, it should do a cv_broadcast to wakeup all of the threads
2843          * waiting for it to finish. If the thread wakes up to find that
2844          * someone new is now trying to complete the the entry, go back
2845          * to sleep.
2846          */
2847         while (rdc->flags & RDDIR) {
2848                 /*
2849                  * The entry is not complete.
2850                  */
2851                 nfs_rw_exit(&rp->r_rwlock);
2852                 rdc->flags |= RDDIRWAIT;
2853 #ifdef DEBUG
2854                 nfs4_readdir_cache_waits++;
2855 #endif
2856                 while (rdc->flags & RDDIRWAIT) {
2857                         if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) {
2858                                 /*
2859                                  * We got interrupted, probably the user
2860                                  * typed ^C or an alarm fired.  We free the
2861                                  * new entry if we allocated one.
2862                                  */
2863                                 rddir4_cache_rele(rp, rdc);
2864                                 mutex_exit(&rp->r_statelock);
2865                                 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2866                                     RW_READER, FALSE);
2867                                 mutex_enter(&rp->r_statelock);
2868                                 return (NULL);
2869                         }
2870                 }
2871                 mutex_exit(&rp->r_statelock);
2872                 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2873                     RW_READER, FALSE);
2874                 mutex_enter(&rp->r_statelock);
2875         }
2876 
2877         /*
2878          * The entry we were waiting on may have been purged from
2879          * the cache and should no longer be used, release it and
2880          * start over.
2881          */
2882         if (!(rdc->flags & RDDIRCACHED)) {
2883                 rddir4_cache_rele(rp, rdc);
2884                 goto top;
2885         }
2886 
2887         /*
2888          * The entry is completed.  Return it.
2889          */
2890         return (rdc);
2891 }
2892 
2893 /*
2894  * Allocate a cache element and return it.  Can return NULL if memory is
2895  * low.
2896  */
2897 static rddir4_cache *
2898 rddir4_cache_alloc(int flags)
2899 {
2900         rddir4_cache_impl       *rdip = NULL;
2901         rddir4_cache            *rc = NULL;
2902 
2903         rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags);
2904 
2905         if (rdip != NULL) {
2906                 rc = &rdip->rc;
2907                 rc->data = (void *)rdip;
2908                 rc->nfs4_cookie = 0;
2909                 rc->nfs4_ncookie = 0;
2910                 rc->entries = NULL;
2911                 rc->eof = 0;
2912                 rc->entlen = 0;
2913                 rc->buflen = 0;
2914                 rc->actlen = 0;
2915                 /*
2916                  * A readdir is required so set the flag.
2917                  */
2918                 rc->flags = RDDIRREQ;
2919                 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
2920                 rc->error = 0;
2921                 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL);
2922                 rdip->count = 1;
2923 #ifdef DEBUG
2924                 atomic_add_64(&clstat4_debug.dirent.value.ui64, 1);
2925 #endif
2926         }
2927         return (rc);
2928 }
2929 
2930 /*
2931  * Increment the reference count to this cache element.
2932  */
2933 static void
2934 rddir4_cache_hold(rddir4_cache *rc)
2935 {
2936         rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data;
2937 
2938         mutex_enter(&rdip->lock);
2939         rdip->count++;
2940         mutex_exit(&rdip->lock);
2941 }
2942 
2943 /*
2944  * Release a reference to this cache element.  If the count is zero then
2945  * free the element.
2946  */
2947 void
2948 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc)
2949 {
2950         rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data;
2951 
2952         ASSERT(MUTEX_HELD(&rp->r_statelock));
2953 
2954         /*
2955          * Check to see if we have any waiters.  If so, we can wake them
2956          * so that they can proceed.
2957          */
2958         if (rdc->flags & RDDIRWAIT) {
2959                 rdc->flags &= ~RDDIRWAIT;
2960                 cv_broadcast(&rdc->cv);
2961         }
2962 
2963         mutex_enter(&rdip->lock);
2964         ASSERT(rdip->count > 0);
2965         if (--rdip->count == 0) {
2966                 mutex_exit(&rdip->lock);
2967                 rddir4_cache_free(rdip);
2968         } else
2969                 mutex_exit(&rdip->lock);
2970 }
2971 
2972 /*
2973  * Free a cache element.
2974  */
2975 static void
2976 rddir4_cache_free(rddir4_cache_impl *rdip)
2977 {
2978         rddir4_cache *rc = &rdip->rc;
2979 
2980 #ifdef DEBUG
2981         atomic_add_64(&clstat4_debug.dirent.value.ui64, -1);
2982 #endif
2983         if (rc->entries != NULL)
2984                 kmem_free(rc->entries, rc->buflen);
2985         cv_destroy(&rc->cv);
2986         mutex_destroy(&rdip->lock);
2987         kmem_free(rdip, sizeof (*rdip));
2988 }
2989 
2990 /*
2991  * Snapshot callback for nfs:0:nfs4_client as registered with the kstat
2992  * framework.
2993  */
2994 static int
2995 cl4_snapshot(kstat_t *ksp, void *buf, int rw)
2996 {
2997         ksp->ks_snaptime = gethrtime();
2998         if (rw == KSTAT_WRITE) {
2999                 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl));
3000         } else {
3001                 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl));
3002         }
3003         return (0);
3004 }
3005 
3006 #ifdef DEBUG
3007 static int
3008 cl4_debug_snapshot(kstat_t *ksp, void *buf, int rw)
3009 {
3010         ksp->ks_snaptime = gethrtime();
3011         if (rw == KSTAT_WRITE) {
3012                 /*
3013                  * Currently only the global zone can write to kstats, but we
3014                  * add the check just for paranoia.
3015                  */
3016                 if (INGLOBALZONE(curproc)) {
3017                         bcopy(buf, &clstat4_debug, sizeof (clstat4_debug));
3018                 }
3019         } else {
3020                 /*
3021                  * If we're displaying the "global" debug kstat values, we
3022                  * display them as-is to all zones since in fact they apply to
3023                  * the system as a whole.
3024                  */
3025                 bcopy(&clstat4_debug, buf, sizeof (clstat4_debug));
3026         }
3027         return (0);
3028 }
3029 #endif
3030 
3031 
3032 
3033 /*
3034  * Zone support
3035  */
3036 static void *
3037 clinit4_zone(zoneid_t zoneid)
3038 {
3039         kstat_t *nfs4_client_kstat;
3040         kstat_t *nfs41_client_kstat;
3041         struct nfs4_clnt *nfscl;
3042         uint_t ndata;
3043 
3044         nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
3045         mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL);
3046         nfscl->nfscl_chtable4 = NULL;
3047         nfscl->nfscl_zoneid = zoneid;
3048 
3049         bcopy(&clstat4_tmpl, &nfscl->nfscl_stat[NFS4_MINOR_v0],
3050             sizeof (clstat4_tmpl));
3051         ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t);
3052         if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client",
3053             "misc", KSTAT_TYPE_NAMED, ndata,
3054             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
3055                 nfs4_client_kstat->ks_private =
3056                     &nfscl->nfscl_stat[NFS4_MINOR_v0];
3057                 nfs4_client_kstat->ks_snapshot = cl4_snapshot;
3058                 kstat_install(nfs4_client_kstat);
3059         }
3060 
3061         bcopy(&clstat4_tmpl, &nfscl->nfscl_stat[NFS4_MINOR_v1],
3062             sizeof (clstat4_tmpl));
3063         if ((nfs41_client_kstat = kstat_create_zone("nfs", 0, "nfs41_client",
3064             "misc", KSTAT_TYPE_NAMED, ndata,
3065             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
3066                 nfs41_client_kstat->ks_private =
3067                     &nfscl->nfscl_stat[NFS4_MINOR_v1];
3068                 nfs41_client_kstat->ks_snapshot = cl4_snapshot;
3069                 kstat_install(nfs41_client_kstat);
3070         }
3071 
3072         mutex_enter(&nfs4_clnt_list_lock);
3073         list_insert_head(&nfs4_clnt_list, nfscl);
3074         mutex_exit(&nfs4_clnt_list_lock);
3075         return (nfscl);
3076 }
3077 
3078 /*ARGSUSED*/
3079 static void
3080 clfini4_zone(zoneid_t zoneid, void *arg)
3081 {
3082         struct nfs4_clnt *nfscl = arg;
3083         chhead_t *chp, *next;
3084 
3085         if (nfscl == NULL)
3086                 return;
3087         mutex_enter(&nfs4_clnt_list_lock);
3088         list_remove(&nfs4_clnt_list, nfscl);
3089         mutex_exit(&nfs4_clnt_list_lock);
3090         clreclaim4_zone(nfscl, 0);
3091         for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) {
3092                 ASSERT(chp->ch_list == NULL);
3093                 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
3094                 next = chp->ch_next;
3095                 kmem_free(chp, sizeof (*chp));
3096         }
3097         kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid);
3098         kstat_delete_byname_zone("nfs", 0, "nfs41_client", zoneid);
3099         mutex_destroy(&nfscl->nfscl_chtable4_lock);
3100         kmem_free(nfscl, sizeof (*nfscl));
3101 }
3102 
3103 /*
3104  * Called by endpnt_destructor to make sure the client handles are
3105  * cleaned up before the RPC endpoints.  This becomes a no-op if
3106  * clfini_zone (above) is called first.  This function is needed
3107  * (rather than relying on clfini_zone to clean up) because the ZSD
3108  * callbacks have no ordering mechanism, so we have no way to ensure
3109  * that clfini_zone is called before endpnt_destructor.
3110  */
3111 void
3112 clcleanup4_zone(zoneid_t zoneid)
3113 {
3114         struct nfs4_clnt *nfscl;
3115 
3116         mutex_enter(&nfs4_clnt_list_lock);
3117         nfscl = list_head(&nfs4_clnt_list);
3118         for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) {
3119                 if (nfscl->nfscl_zoneid == zoneid) {
3120                         clreclaim4_zone(nfscl, 0);
3121                         break;
3122                 }
3123         }
3124         mutex_exit(&nfs4_clnt_list_lock);
3125 }
3126 
3127 int
3128 nfs4_subr_init(void)
3129 {
3130         /*
3131          * Allocate and initialize the client handle cache
3132          */
3133 #ifdef DEBUG
3134         uint_t ndata;
3135         kstat_t *nfs4_debug_kstat;
3136 #endif
3137         chtab4_cache = kmem_cache_create("client_handle4_cache",
3138             sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL,
3139             NULL, 0);
3140 
3141 #ifdef DEBUG
3142         /*
3143          * Create a kstat to maintain debug statistics across all zones
3144          */
3145         ndata = sizeof (clstat4_debug) / sizeof (kstat_named_t);
3146         if ((nfs4_debug_kstat = kstat_create("nfs", 0, "nfs4_client_debug",
3147             "misc", KSTAT_TYPE_NAMED, ndata,
3148             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE)) != NULL) {
3149                 nfs4_debug_kstat->ks_private = &clstat4_debug;
3150                 nfs4_debug_kstat->ks_snapshot = cl4_debug_snapshot;
3151                 kstat_install(nfs4_debug_kstat);
3152         }
3153 #endif
3154 
3155 
3156         /*
3157          * Initialize the list of per-zone client handles (and associated data).
3158          * This needs to be done before we call zone_key_create().
3159          */
3160         list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt),
3161             offsetof(struct nfs4_clnt, nfscl_node));
3162 
3163         /*
3164          * Initialize the zone_key for per-zone client handle lists.
3165          */
3166         zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone);
3167 
3168         if (nfs4err_delay_time == 0)
3169                 nfs4err_delay_time = NFS4ERR_DELAY_TIME;
3170 
3171         return (0);
3172 }
3173 
3174 int
3175 nfs4_subr_fini(void)
3176 {
3177         /*
3178          * Deallocate the client handle cache
3179          */
3180         kmem_cache_destroy(chtab4_cache);
3181 #ifdef DEBUG
3182         kstat_delete_byname("nfs", 0, "nfs4_client_debug");
3183 #endif
3184 
3185         /*
3186          * Destroy the zone_key
3187          */
3188         (void) zone_key_delete(nfs4clnt_zone_key);
3189 
3190         return (0);
3191 }
3192 /*
3193  * Set or Clear direct I/O flag
3194  * VOP_RWLOCK() is held for write access to prevent a race condition
3195  * which would occur if a process is in the middle of a write when
3196  * directio flag gets set. It is possible that all pages may not get flushed.
3197  *
3198  * This is a copy of nfs_directio, changes here may need to be made
3199  * there and vice versa.
3200  */
3201 
3202 int
3203 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr)
3204 {
3205         int     error = 0;
3206         rnode4_t *rp;
3207 
3208         rp = VTOR4(vp);
3209 
3210         if (cmd == DIRECTIO_ON) {
3211 
3212                 if (rp->r_flags & R4DIRECTIO)
3213                         return (0);
3214 
3215                 /*
3216                  * Flush the page cache.
3217                  */
3218 
3219                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
3220 
3221                 if (rp->r_flags & R4DIRECTIO) {
3222                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
3223                         return (0);
3224                 }
3225 
3226                 if (nfs4_has_pages(vp) &&
3227                     ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) {
3228                         error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
3229                             B_INVAL, cr, NULL);
3230                         if (error) {
3231                                 if (error == ENOSPC || error == EDQUOT) {
3232                                         mutex_enter(&rp->r_statelock);
3233                                         if (!rp->r_error)
3234                                                 rp->r_error = error;
3235                                         mutex_exit(&rp->r_statelock);
3236                                 }
3237                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
3238                                 return (error);
3239                         }
3240                 }
3241 
3242                 mutex_enter(&rp->r_statelock);
3243                 rp->r_flags |= R4DIRECTIO;
3244                 mutex_exit(&rp->r_statelock);
3245                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
3246                 return (0);
3247         }
3248 
3249         if (cmd == DIRECTIO_OFF) {
3250                 mutex_enter(&rp->r_statelock);
3251                 rp->r_flags &= ~R4DIRECTIO;      /* disable direct mode */
3252                 mutex_exit(&rp->r_statelock);
3253                 return (0);
3254         }
3255 
3256         return (EINVAL);
3257 }
3258 
3259 /*
3260  * Return TRUE if the file has any pages.  Always go back to
3261  * the master vnode to check v_pages since none of the shadows
3262  * can have pages.
3263  */
3264 
3265 bool_t
3266 nfs4_has_pages(vnode_t *vp)
3267 {
3268         rnode4_t *rp;
3269 
3270         rp = VTOR4(vp);
3271         if (IS_SHADOW(vp, rp))
3272                 vp = RTOV4(rp); /* RTOV4 always gives the master */
3273 
3274         return (vn_has_cached_data(vp));
3275 }
3276 
3277 /*
3278  * This table is used to determine whether the client should attempt
3279  * failover based on the clnt_stat value returned by CLNT_CALL.  The
3280  * clnt_stat is used as an index into the table.  If
3281  * the error value that corresponds to the clnt_stat value in the
3282  * table is non-zero, then that is the error to be returned AND
3283  * that signals that failover should be attempted.
3284  *
3285  * Special note: If the RPC_ values change, then direct indexing of the
3286  * table is no longer valid, but having the RPC_ values in the table
3287  * allow the functions to detect the change and issue a warning.
3288  * In this case, the code will always attempt failover as a defensive
3289  * measure.
3290  */
3291 
3292 static struct try_failover_tab {
3293         enum clnt_stat  cstat;
3294         int             error;
3295 } try_failover_table [] = {
3296 
3297         RPC_SUCCESS,            0,
3298         RPC_CANTENCODEARGS,     0,
3299         RPC_CANTDECODERES,      0,
3300         RPC_CANTSEND,           ECOMM,
3301         RPC_CANTRECV,           ECOMM,
3302         RPC_TIMEDOUT,           ETIMEDOUT,
3303         RPC_VERSMISMATCH,       0,
3304         RPC_AUTHERROR,          0,
3305         RPC_PROGUNAVAIL,        0,
3306         RPC_PROGVERSMISMATCH,   0,
3307         RPC_PROCUNAVAIL,        0,
3308         RPC_CANTDECODEARGS,     0,
3309         RPC_SYSTEMERROR,        ENOSR,
3310         RPC_UNKNOWNHOST,        EHOSTUNREACH,
3311         RPC_RPCBFAILURE,        ENETUNREACH,
3312         RPC_PROGNOTREGISTERED,  ECONNREFUSED,
3313         RPC_FAILED,             ETIMEDOUT,
3314         RPC_UNKNOWNPROTO,       EHOSTUNREACH,
3315         RPC_INTR,               0,
3316         RPC_UNKNOWNADDR,        EHOSTUNREACH,
3317         RPC_TLIERROR,           0,
3318         RPC_NOBROADCAST,        EHOSTUNREACH,
3319         RPC_N2AXLATEFAILURE,    ECONNREFUSED,
3320         RPC_UDERROR,            0,
3321         RPC_INPROGRESS,         0,
3322         RPC_STALERACHANDLE,     EINVAL,
3323         RPC_CANTCONNECT,        ECONNREFUSED,
3324         RPC_XPRTFAILED,         ECONNABORTED,
3325         RPC_CANTCREATESTREAM,   ECONNREFUSED,
3326         RPC_CANTSTORE,          ENOBUFS,
3327         RPC_CONN_NOT_BOUND,     0
3328 };
3329 
3330 /*
3331  * nfs4_try_failover - determine whether the client should
3332  * attempt failover based on the values stored in the nfs4_error_t.
3333  */
3334 int
3335 nfs4_try_failover(nfs4_error_t *ep)
3336 {
3337         if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE)
3338                 return (TRUE);
3339 
3340         if (ep->error && ep->rpc_status != RPC_SUCCESS)
3341                 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE);
3342 
3343         return (FALSE);
3344 }
3345 
3346 /*
3347  * try_failover - internal version of nfs4_try_failover, called
3348  * only by rfscall and aclcall.  Determine if failover is warranted
3349  * based on the clnt_stat and return the error number if it is.
3350  */
3351 static int
3352 try_failover(enum clnt_stat rpc_status)
3353 {
3354         int err = 0;
3355 
3356         if (rpc_status == RPC_SUCCESS)
3357                 return (0);
3358 
3359 #ifdef  DEBUG
3360         if (rpc_status != 0 && nfs4_try_failover_any) {
3361                 err = ETIMEDOUT;
3362                 goto done;
3363         }
3364 #endif
3365         /*
3366          * The rpc status is used as an index into the table.
3367          * If the rpc status is outside of the range of the
3368          * table or if the rpc error numbers have been changed
3369          * since the table was constructed, then print a warning
3370          * (DEBUG only) and try failover anyway.  Otherwise, just
3371          * grab the resulting error number out of the table.
3372          */
3373         if (rpc_status < RPC_SUCCESS || rpc_status >=
3374             sizeof (try_failover_table)/sizeof (try_failover_table[0]) ||
3375             try_failover_table[rpc_status].cstat != rpc_status) {
3376 
3377                 err = ETIMEDOUT;
3378 #ifdef  DEBUG
3379                 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d",
3380                     rpc_status);
3381 #endif
3382         } else
3383                 err = try_failover_table[rpc_status].error;
3384 
3385 done:
3386         if (rpc_status)
3387                 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
3388                     "nfs4_try_failover: %strying failover on error %d",
3389                     err ? "" : "NOT ", rpc_status));
3390 
3391         return (err);
3392 }
3393 
3394 void
3395 nfs4_error_zinit(nfs4_error_t *ep)
3396 {
3397         ep->error = 0;
3398         ep->stat = NFS4_OK;
3399         ep->rpc_status = RPC_SUCCESS;
3400 }
3401 
3402 void
3403 nfs4_error_init(nfs4_error_t *ep, int error)
3404 {
3405         ep->error = error;
3406         ep->stat = NFS4_OK;
3407         ep->rpc_status = RPC_SUCCESS;
3408 }
3409 
3410 
3411 #ifdef DEBUG
3412 
3413 /*
3414  * Return a 16-bit hash for filehandle, stateid, clientid, owner.
3415  * use the same algorithm as for NFS v3.
3416  *
3417  */
3418 int
3419 hash16(void *p, int len)
3420 {
3421         int i, rem;
3422         uint_t *wp;
3423         uint_t key = 0;
3424 
3425         /* protect against non word aligned */
3426         if ((rem = len & 3) != 0)
3427                 len &= ~3;
3428 
3429         for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) {
3430                 key ^= (*wp >> 16) ^ *wp;
3431         }
3432 
3433         /* hash left-over bytes */
3434         for (i = 0; i < rem; i++)
3435                 key ^= *((uchar_t *)p + i);
3436 
3437         return (key & 0xffff);
3438 }
3439 
3440 /*
3441  * rnode4info - return filehandle and path information for an rnode.
3442  * XXX MT issues: uses a single static buffer, no locking of path.
3443  */
3444 char *
3445 rnode4info(rnode4_t *rp)
3446 {
3447         static char buf[80];
3448         nfs4_fhandle_t fhandle;
3449         char *path;
3450         char *type;
3451 
3452         if (rp == NULL)
3453                 return ("null");
3454         if (rp->r_flags & R4ISXATTR)
3455                 type = "attr";
3456         else if (RTOV4(rp)->v_flag & V_XATTRDIR)
3457                 type = "attrdir";
3458         else if (RTOV4(rp)->v_flag & VROOT)
3459                 type = "root";
3460         else if (RTOV4(rp)->v_type == VDIR)
3461                 type = "dir";
3462         else if (RTOV4(rp)->v_type == VREG)
3463                 type = "file";
3464         else
3465                 type = "other";
3466         sfh4_copyval(rp->r_fh, &fhandle);
3467         path = fn_path(rp->r_svnode.sv_name);
3468         (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n",
3469             (void *)rp, path, type, rp->r_flags,
3470             hash16((void *)&fhandle.fh_buf, fhandle.fh_len));
3471         kmem_free(path, strlen(path)+1);
3472         return (buf);
3473 }
3474 #endif
3475 
3476 int nfs4_sessions_debug;
3477 
3478 void
3479 nfs4sequence_setup(nfs4_session_t *np, COMPOUND4args_clnt *rfsargp,
3480         nfs4_slot_t **slotpp)
3481 {
3482         int                     slot_id = 0;
3483         nfs4_slot_t             *slot;
3484 
3485         bcopy(&np->sessionid,
3486             rfsargp->array->nfs_argop4_u.opsequence.sa_sessionid,
3487             sizeof (sessionid4));
3488 
3489         /*
3490          * Find a slot to use.
3491          */
3492         (void) nfs_rw_enter_sig(&np->slot_table_rwlock, RW_READER, 0);
3493         mutex_enter(&np->slot_lock);
3494         slot_id = np->next_slot;
3495         while ((np->slot_table[slot_id]->slot_inuse != 0) ||
3496             (np->slot_table[slot_id]->slot_bad != 0)) {
3497                 /*
3498                  * Can drop the rwlock here so we don't hold it over
3499                  * a possible cv_wait.
3500                  */
3501                 nfs_rw_exit(&np->slot_table_rwlock);
3502 
3503                 /*
3504                  * This slot is still in use.
3505                  * Check next slot if there are still some available.
3506                  */
3507 
3508                 while (np->slots_available == 0) {
3509                         if (nfs4_sessions_debug)
3510                                 cmn_err(CE_WARN, "Waiting for Available Slot");
3511                         cv_wait(&np->slot_wait, &np->slot_lock);
3512                 }
3513                 slot_id++;
3514                 if (slot_id == np->maxslots)
3515                         slot_id = 0;
3516                 (void) nfs_rw_enter_sig(&np->slot_table_rwlock, RW_READER, 0);
3517         }
3518         *slotpp = slot = np->slot_table[slot_id];
3519         slot->slot_inuse = 1;
3520         np->slots_available--;
3521         np->next_slot = slot_id + 1 == np->maxslots ? 0 : slot_id + 1;
3522 
3523         /*
3524          * Update SEQUENCE args
3525          */
3526         rfsargp->array->nfs_argop4_u.opsequence.sa_sequenceid =
3527             slot->slot_seqid;
3528         rfsargp->array->nfs_argop4_u.opsequence.sa_slotid = slot->slot_id;
3529         rfsargp->array->nfs_argop4_u.opsequence.sa_highest_slotid  =
3530             np->maxslots - np->slots_available;
3531         /* XXX - rick - need sr_target_highest_slotid */
3532         mutex_exit(&np->slot_lock);
3533         nfs_rw_exit(&np->slot_table_rwlock);
3534 }
3535 
3536 void
3537 nfs4sequence_fin(nfs4_session_t *np, COMPOUND4res_clnt *rfsresp,
3538         nfs4_slot_t *slot, nfs4_error_t *ep)
3539 {
3540         SEQUENCE4resok          *seqres;
3541 
3542         mutex_enter(&np->slot_lock);
3543 
3544         ASSERT(slot->slot_inuse);
3545         slot->slot_inuse = 0;
3546 
3547         /* if call started but not completed, mark slot as bad */
3548         if ((ep->error != 0) &&
3549             ((ep->rpc_status == RPC_TIMEDOUT) ||
3550             (ep->rpc_status == RPC_INTR))) {
3551                 cmn_err(CE_WARN, "SEQUENCE failed %d, bad slot %d:%d",
3552                     ep->rpc_status, slot->slot_id, slot->slot_seqid);
3553                 slot->slot_bad = 1;
3554         } else {
3555                 if (slot->slot_id < np->next_slot)
3556                         np->next_slot = slot->slot_id;
3557 
3558                 /* Update slot seqid on successful op_sequence */
3559                 if (ep->error == 0 && (rfsresp->array != NULL &&
3560                     rfsresp->array->nfs_resop4_u.opsequence.sr_status ==
3561                     NFS4_OK))
3562                         slot->slot_seqid++;
3563 
3564                 if (np->slots_available++ == 0) {
3565                         if (nfs4_sessions_debug)
3566                                 cmn_err(CE_WARN, "Slots Available");
3567                         cv_broadcast(&np->slot_wait);
3568                 }
3569         }
3570 
3571         mutex_exit(&np->slot_lock);
3572 
3573         /* SEQUENCE Op Successful? */
3574         if (ep->error != 0 || rfsresp->status != NFS4_OK ||
3575             (rfsresp->array != NULL &&
3576             rfsresp->array->nfs_resop4_u.opsequence.sr_status != NFS4_OK)) {
3577                 /*
3578                  * cmn_err(CE_WARN, "sequence op failed or missing\n");
3579                  */
3580                 return;
3581         }
3582 
3583         seqres = &rfsresp->array->nfs_resop4_u.opsequence.
3584             SEQUENCE4res_u.sr_resok4;
3585 
3586         /*
3587          * Sequence Op Successful, Handle Errors and maxslot changes.
3588          */
3589 
3590         if (seqres->sr_status_flags & SEQ4_STATUS_CB_PATH_DOWN) {
3591 #ifdef  notyet
3592                 nfs4_delegreturn_all(np);
3593 #else
3594                 cmn_err(CE_WARN, "SEQ4_STATUS_CB_PATH_DOWN not handled");
3595 #endif
3596         }
3597 
3598         if (seqres->sr_status_flags & SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING) {
3599                 cmn_err(CE_WARN, "SEQUENCE got CB_GSS_CONTEXTS_EXPIRING");
3600         }
3601 
3602         if (seqres->sr_status_flags & SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED) {
3603                 cmn_err(CE_WARN, "SEQUENCE got CB_GSS_CONTEXTS_EXPIRED");
3604         }
3605 
3606         if (seqres->sr_status_flags & SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED) {
3607                 cmn_err(CE_WARN, "SEQUENCE got EXIPRED_ALL_STATE_REVOKED");
3608         }
3609 
3610         if (seqres->sr_status_flags & SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED) {
3611                 cmn_err(CE_WARN, "SEQUENCE got EXPIRED_SOME_STATE_REVOKED");
3612         }
3613 
3614         if (seqres->sr_status_flags & SEQ4_STATUS_ADMIN_STATE_REVOKED) {
3615                 cmn_err(CE_WARN, "SEQUENCE got ADMIN_STATE_REVOKED");
3616         }
3617 
3618         if (seqres->sr_status_flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) {
3619                 cmn_err(CE_WARN, "SEQUENCE got RECALLABLE_STATE_REVOKED");
3620         }
3621 
3622         if (seqres->sr_status_flags & SEQ4_STATUS_LEASE_MOVED) {
3623                 cmn_err(CE_WARN, "SEQUENCE got LEASE_MOVED");
3624         }
3625 }
3626 
3627 kmutex_t nfs4_session_lst_lock;
3628 list_t nfs4_session_list;
3629 
3630 void
3631 nfs4session_init()
3632 {
3633         mutex_init(&nfs4_session_lst_lock, NULL, MUTEX_DEFAULT, NULL);
3634         list_create(&nfs4_session_list, sizeof (nfs4_session_t),
3635             offsetof(nfs4_session_t, ssx_list));
3636 }
3637 
3638 /*
3639  * Compare 2 netbufs, return true of they match
3640  */
3641 int
3642 netbuf_match(struct netbuf *n1, struct netbuf *n2)
3643 {
3644         if (n1->len == n2->len && bcmp(n1->buf, n2->buf, n1->len) == 0)
3645                 return (1);
3646         return (0);
3647 }
3648 
3649 void *
3650 new_string(void *cur)
3651 {
3652         void *v;
3653 
3654         v = kmem_alloc(strlen(cur)+1, KM_SLEEP);
3655         (void) strcpy(v, cur);
3656         return (v);
3657 }
3658 
3659 servinfo4_t *
3660 new_servinfo4(struct knetconfig *knc, struct netbuf *nb, int flags)
3661 {
3662         servinfo4_t *svp;
3663         struct sec_data *secdata;
3664 
3665         /*
3666          * Allocate a servinfo4 struct.
3667          */
3668         svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
3669         nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
3670         svp->sv_flags = flags;
3671 
3672         svp->sv_knconf = kmem_alloc(sizeof (*knc), KM_SLEEP);
3673         svp->sv_knconf->knc_semantics = knc->knc_semantics;
3674         svp->sv_knconf->knc_protofmly = new_string(knc->knc_protofmly);
3675         svp->sv_knconf->knc_proto = new_string(knc->knc_proto);
3676         svp->sv_knconf->knc_rdev = knc->knc_rdev;
3677         bzero(svp->sv_knconf->knc_unused, sizeof (knc->knc_unused));
3678 
3679         svp->sv_addr.maxlen = nb->maxlen;
3680         svp->sv_addr.len = nb->len;
3681         svp->sv_addr.buf = kmem_alloc(nb->maxlen, KM_SLEEP);
3682         bcopy(nb->buf, svp->sv_addr.buf, nb->len);
3683 
3684         /* XXX, ought to inherit sec data from parent servinfo4 */
3685         secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
3686         secdata->secmod = secdata->rpcflavor = AUTH_SYS;
3687         secdata->data = NULL;
3688         svp->sv_secdata = secdata;
3689 
3690         /* XXX */
3691         svp->sv_path = "/";
3692         svp->sv_pathlen = 1;
3693         svp->sv_hostname = "data-server";
3694         svp->sv_hostnamelen = strlen("data-server");
3695 
3696         return (svp);
3697 }
3698 
3699 /*
3700  * XXX - this will be eliminated once everyone is calling rfs4call()
3701  * emulate the behavior of rfs4call for those who call
3702  * CLNT_CALL directly
3703  */
3704 void
3705 nfs4_error_set(nfs4_error_t *ep, enum clnt_stat rpc_status, enum nfsstat4 stat)
3706 {
3707         if (rpc_status == RPC_SUCCESS) {
3708                 ep->error = 0;       /* geterrno4 happens higher up */
3709                 ep->stat = stat;
3710                 ep->rpc_status = RPC_SUCCESS;
3711         } else {
3712                 ep->error = EPROTO;  /* XXX */
3713                 ep->stat = 0;
3714                 ep->rpc_status = rpc_status;
3715         }
3716 }
3717 
3718 /*
3719  * A function to interface with RPC tags.
3720  * Returns 0 on success
3721  */
3722 int
3723 nfs4_tag_ctl(nfs4_server_t *np, mntinfo4_t *mi, servinfo4_t *svp,
3724     sessionid4 oldsid, int cmd, cred_t *cr)
3725 {
3726         int error;
3727         CLIENT *client;
3728         struct chtab *ch;
3729         struct nfs4_clnt *nfscl;
3730 
3731         nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
3732         ASSERT(nfscl != NULL);
3733 
3734         if (svp == NULL) {
3735                 /*
3736                  * We just pick the current servinfo ptr. Even if
3737                  * this changes midstream, we should be alright, since
3738                  * we are not really going OTW. Just used to get a
3739                  * client handle.
3740                  */
3741                 mutex_enter(&mi->mi_lock);
3742                 svp = mi->mi_curr_serv;
3743                 mutex_exit(&mi->mi_lock);
3744         }
3745 
3746         error = nfs_clget4(mi, svp, cr, &client, &ch, nfscl);
3747 
3748         if (error)
3749                 return (error);
3750 
3751         switch (cmd) {
3752         case NFS4_TAG_SWAP:
3753 
3754                 /*
3755                  * To do the sessid swap first set the old tag and
3756                  * then call to swap to the new one
3757                  */
3758 
3759                 if (!CLNT_CONTROL(client, CLSET_TAG, (char *)oldsid)) {
3760                         zcmn_err(getzoneid(), CE_WARN,
3761                             "Failed to set tag on client handle");
3762                         error = EIO;
3763                         break;
3764                 }
3765 
3766                 /*
3767                  * This switches the tag value in the RPC layer
3768                  * The client handle's tag (client->cku_tag) is set
3769                  * to new tag as well.
3770                  */
3771 
3772                 if (!CLNT_CONTROL(client, CLSET_TAG_SWAP,
3773                     (char *)(np->ssx.sessionid))) {
3774                         zcmn_err(getzoneid(), CE_WARN,
3775                             "Failed to swap rpc tags");
3776                         error = EIO;
3777                 }
3778 
3779                 break;
3780 
3781         case NFS4_TAG_DESTROY:
3782 
3783                 if (!CLNT_CONTROL(client, CLSET_TAG_DESTROY,
3784                     (char *)(np->ssx.sessionid))) {
3785                         zcmn_err(getzoneid(), CE_WARN,
3786                             "Failed destroy rpc tags");
3787                         error = EIO;
3788                 }
3789                 break;
3790 
3791         case NFS4_CBSERVER_CLEANUP:
3792                 if (!CLNT_CONTROL(client, CLSET_CBSERVER_CLEANUP,
3793                     (char *)(np->ssx.sessionid))) {
3794                         zcmn_err(getzoneid(), CE_WARN,
3795                             "Failed destroy rpc tags");
3796                         error = EIO;
3797                 }
3798                 break;
3799         }
3800 
3801         clfree4(client, ch, nfscl);
3802         return (error);
3803 }
3804 
3805 /*
3806  * All NFSv4.1 defined errors
3807  */
3808 char *
3809 nfs41_strerror(nfsstat4 err)
3810 {
3811         switch (err) {
3812         case NFS4_OK:
3813                 return ("NFS4_OK");
3814         case NFS4ERR_PERM:
3815                 return ("NFS4ERR_PERM");
3816         case NFS4ERR_NOENT:
3817                 return ("NFS4ERR_NOENT");
3818         case NFS4ERR_IO:
3819                 return ("NFS4ERR_IO");
3820         case NFS4ERR_NXIO:
3821                 return ("NFS4ERR_NXIO");
3822         case NFS4ERR_ACCESS:
3823                 return ("NFS4ERR_ACCESS");
3824         case NFS4ERR_EXIST:
3825                 return ("NFS4ERR_EXIST");
3826         case NFS4ERR_XDEV:
3827                 return ("NFS4ERR_XDEV");
3828         case NFS4ERR_NOTDIR:
3829                 return ("NFS4ERR_NOTDIR");
3830         case NFS4ERR_ISDIR:
3831                 return ("NFS4ERR_ISDIR");
3832         case NFS4ERR_INVAL:
3833                 return ("NFS4ERR_INVAL");
3834         case NFS4ERR_FBIG:
3835                 return ("NFS4ERR_FBIG");
3836         case NFS4ERR_NOSPC:
3837                 return ("NFS4ERR_NOSPC");
3838         case NFS4ERR_ROFS:
3839                 return ("NFS4ERR_ROFS");
3840         case NFS4ERR_MLINK:
3841                 return ("NFS4ERR_MLINK");
3842         case NFS4ERR_NAMETOOLONG:
3843                 return ("NFS4ERR_NAMETOOLONG");
3844         case NFS4ERR_NOTEMPTY:
3845                 return ("NFS4ERR_NOTEMPTY");
3846         case NFS4ERR_DQUOT:
3847                 return ("NFS4ERR_DQUOT");
3848         case NFS4ERR_STALE:
3849                 return ("NFS4ERR_STALE");
3850         case NFS4ERR_BADHANDLE:
3851                 return ("NFS4ERR_BADHANDLE");
3852         case NFS4ERR_BAD_COOKIE:
3853                 return ("NFS4ERR_BAD_COOKIE");
3854         case NFS4ERR_NOTSUPP:
3855                 return ("NFS4ERR_NOTSUPP");
3856         case NFS4ERR_TOOSMALL:
3857                 return ("NFS4ERR_TOOSMALL");
3858         case NFS4ERR_SERVERFAULT:
3859                 return ("NFS4ERR_SERVERFAULT");
3860         case NFS4ERR_BADTYPE:
3861                 return ("NFS4ERR_BADTYPE");
3862         case NFS4ERR_DELAY:
3863                 return ("NFS4ERR_DELAY");
3864         case NFS4ERR_SAME:
3865                 return ("NFS4ERR_SAME");
3866         case NFS4ERR_DENIED:
3867                 return ("NFS4ERR_DENIED");
3868         case NFS4ERR_EXPIRED:
3869                 return ("NFS4ERR_EXPIRED");
3870         case NFS4ERR_LOCKED:
3871                 return ("NFS4ERR_LOCKED");
3872         case NFS4ERR_GRACE:
3873                 return ("NFS4ERR_GRACE");
3874         case NFS4ERR_FHEXPIRED:
3875                 return ("NFS4ERR_FHEXPIRED");
3876         case NFS4ERR_SHARE_DENIED:
3877                 return ("NFS4ERR_SHARE_DENIED");
3878         case NFS4ERR_WRONGSEC:
3879                 return ("NFS4ERR_WRONGSEC");
3880         case NFS4ERR_CLID_INUSE:
3881                 return ("NFS4ERR_CLID_INUSE");
3882         case NFS4ERR_RESOURCE:
3883                 return ("NFS4ERR_RESOURCE");
3884         case NFS4ERR_MOVED:
3885                 return ("NFS4ERR_MOVED");
3886         case NFS4ERR_NOFILEHANDLE:
3887                 return ("NFS4ERR_NOFILEHANDLE");
3888         case NFS4ERR_MINOR_VERS_MISMATCH:
3889                 return ("NFS4ERR_MINOR_VERS_MISMATCH");
3890         case NFS4ERR_STALE_CLIENTID:
3891                 return ("NFS4ERR_STALE_CLIENTID");
3892         case NFS4ERR_STALE_STATEID:
3893                 return ("NFS4ERR_STALE_STATEID");
3894         case NFS4ERR_OLD_STATEID:
3895                 return ("NFS4ERR_OLD_STATEID");
3896         case NFS4ERR_BAD_STATEID:
3897                 return ("NFS4ERR_BAD_STATEID");
3898         case NFS4ERR_BAD_SEQID:
3899                 return ("NFS4ERR_BAD_SEQID");
3900         case NFS4ERR_NOT_SAME:
3901                 return ("NFS4ERR_NOT_SAME");
3902         case NFS4ERR_LOCK_RANGE:
3903                 return ("NFS4ERR_LOCK_RANGE");
3904         case NFS4ERR_SYMLINK:
3905                 return ("NFS4ERR_SYMLINK");
3906         case NFS4ERR_RESTOREFH:
3907                 return ("NFS4ERR_RESTOREFH");
3908         case NFS4ERR_LEASE_MOVED:
3909                 return ("NFS4ERR_LEASE_MOVED");
3910         case NFS4ERR_ATTRNOTSUPP:
3911                 return ("NFS4ERR_ATTRNOTSUPP");
3912         case NFS4ERR_NO_GRACE:
3913                 return ("NFS4ERR_NO_GRACE");
3914         case NFS4ERR_RECLAIM_BAD:
3915                 return ("NFS4ERR_RECLAIM_BAD");
3916         case NFS4ERR_RECLAIM_CONFLICT:
3917                 return ("NFS4ERR_RECLAIM_CONFLICT");
3918         case NFS4ERR_BADXDR:
3919                 return ("NFS4ERR_BADXDR");
3920         case NFS4ERR_LOCKS_HELD:
3921                 return ("NFS4ERR_LOCKS_HELD");
3922         case NFS4ERR_OPENMODE:
3923                 return ("NFS4ERR_OPENMODE");
3924         case NFS4ERR_BADOWNER:
3925                 return ("NFS4ERR_BADOWNER");
3926         case NFS4ERR_BADCHAR:
3927                 return ("NFS4ERR_BADCHAR");
3928         case NFS4ERR_BADNAME:
3929                 return ("NFS4ERR_BADNAME");
3930         case NFS4ERR_BAD_RANGE:
3931                 return ("NFS4ERR_BAD_RANGE");
3932         case NFS4ERR_LOCK_NOTSUPP:
3933                 return ("NFS4ERR_LOCK_NOTSUPP");
3934         case NFS4ERR_OP_ILLEGAL:
3935                 return ("NFS4ERR_OP_ILLEGAL");
3936         case NFS4ERR_DEADLOCK:
3937                 return ("NFS4ERR_DEADLOCK");
3938         case NFS4ERR_FILE_OPEN:
3939                 return ("NFS4ERR_FILE_OPEN");
3940         case NFS4ERR_ADMIN_REVOKED:
3941                 return ("NFS4ERR_ADMIN_REVOKED");
3942         case NFS4ERR_CB_PATH_DOWN:
3943                 return ("NFS4ERR_CB_PATH_DOWN");
3944         case NFS4ERR_BADIOMODE:
3945                 return ("NFS4ERR_BADIOMODE");
3946         case NFS4ERR_BADLAYOUT:
3947                 return ("NFS4ERR_BADLAYOUT");
3948         case NFS4ERR_BAD_SESSION_DIGEST:
3949                 return ("NFS4ERR_BAD_SESSION_DIGEST");
3950         case NFS4ERR_BADSESSION:
3951                 return ("NFS4ERR_BADSESSION");
3952         case NFS4ERR_BADSLOT:
3953                 return ("NFS4ERR_BADSLOT");
3954         case NFS4ERR_COMPLETE_ALREADY:
3955                 return ("NFS4ERR_COMPLETE_ALREADY");
3956         case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
3957                 return ("NFS4ERR_CONN_NOT_BOUND_TO_SESSION");
3958         case NFS4ERR_DELEG_ALREADY_WANTED:
3959                 return ("NFS4ERR_DELEG_ALREADY_WANTED");
3960         case NFS4ERR_BACK_CHAN_BUSY:
3961                 return ("NFS4ERR_BACK_CHAN_BUSY");
3962         case NFS4ERR_LAYOUTTRYLATER:
3963                 return ("NFS4ERR_LAYOUTTRYLATER");
3964         case NFS4ERR_LAYOUTUNAVAILABLE:
3965                 return ("NFS4ERR_LAYOUTUNAVAILABLE");
3966         case NFS4ERR_NOMATCHING_LAYOUT:
3967                 return ("NFS4ERR_NOMATCHING_LAYOUT");
3968         case NFS4ERR_RECALLCONFLICT:
3969                 return ("NFS4ERR_RECALLCONFLICT");
3970         case NFS4ERR_UNKNOWN_LAYOUTTYPE:
3971                 return ("NFS4ERR_UNKNOWN_LAYOUTTYPE");
3972         case NFS4ERR_SEQ_MISORDERED:
3973                 return ("NFS4ERR_SEQ_MISORDERED");
3974         case NFS4ERR_SEQUENCE_POS:
3975                 return ("NFS4ERR_SEQUENCE_POS");
3976         case NFS4ERR_REQ_TOO_BIG:
3977                 return ("NFS4ERR_REQ_TOO_BIG");
3978         case NFS4ERR_REP_TOO_BIG:
3979                 return ("NFS4ERR_REP_TOO_BIG");
3980         case NFS4ERR_REP_TOO_BIG_TO_CACHE:
3981                 return ("NFS4ERR_REP_TOO_BIG_TO_CACHE");
3982         case NFS4ERR_RETRY_UNCACHED_REP:
3983                 return ("NFS4ERR_RETRY_UNCACHED_REP");
3984         case NFS4ERR_UNSAFE_COMPOUND:
3985                 return ("NFS4ERR_UNSAFE_COMPOUND");
3986         case NFS4ERR_TOO_MANY_OPS:
3987                 return ("NFS4ERR_TOO_MANY_OPS");
3988         case NFS4ERR_OP_NOT_IN_SESSION:
3989                 return ("NFS4ERR_OP_NOT_IN_SESSION");
3990         case NFS4ERR_HASH_ALG_UNSUPP:
3991                 return ("NFS4ERR_HASH_ALG_UNSUPP");
3992         case NFS4ERR_CLIENTID_BUSY:
3993                 return ("NFS4ERR_CLIENTID_BUSY");
3994         case NFS4ERR_PNFS_IO_HOLE:
3995                 return ("NFS4ERR_PNFS_IO_HOLE");
3996         case NFS4ERR_SEQ_FALSE_RETRY:
3997                 return ("NFS4ERR_SEQ_FALSE_RETRY");
3998         case NFS4ERR_BAD_HIGH_SLOT:
3999                 return ("NFS4ERR_BAD_HIGH_SLOT");
4000         case NFS4ERR_DEADSESSION:
4001                 return ("NFS4ERR_DEADSESSION");
4002         case NFS4ERR_ENCR_ALG_UNSUPP:
4003                 return ("NFS4ERR_ENCR_ALG_UNSUPP");
4004         case NFS4ERR_PNFS_NO_LAYOUT:
4005                 return ("NFS4ERR_PNFS_NO_LAYOUT");
4006         case NFS4ERR_NOT_ONLY_OP:
4007                 return ("NFS4ERR_NOT_ONLY_OP");
4008         case NFS4ERR_WRONG_CRED:
4009                 return ("NFS4ERR_WRONG_CRED");
4010         case NFS4ERR_WRONG_TYPE:
4011                 return ("NFS4ERR_WRONG_TYPE");
4012         case NFS4ERR_DIRDELEG_UNAVAIL:
4013                 return ("NFS4ERR_DIRDELEG_UNAVAIL");
4014         case NFS4ERR_REJECT_DELEG:
4015                 return ("NFS4ERR_REJECT_DELEG");
4016         case NFS4ERR_RETURNCONFLICT:
4017                 return ("NFS4ERR_RETURNCONFLICT");
4018         default:
4019                 {
4020                         static char      msg[99];
4021                         static char     *ies = "Unknown NFSv4.1 error";
4022 
4023                         (void) snprintf(msg, 99, "%s: %d", ies, (int)err);
4024                         return (msg);
4025                 }
4026         }
4027 }
--- EOF ---