1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T             */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * University Copyright- Copyright (c) 1982, 1986, 1988
  31  * The Regents of the University of California
  32  * All Rights Reserved
  33  *
  34  * University Acknowledgment- Portions of this document are derived from
  35  * software developed by the University of California, Berkeley, and its
  36  * contributors.
  37  */
  38 
  39 /* LINTLIBRARY */
  40 /* PROTOLIB1 */
  41 
  42 /* NFS server */
  43 
  44 #include <sys/param.h>
  45 #include <sys/types.h>
  46 #include <sys/stat.h>
  47 #include <syslog.h>
  48 #include <tiuser.h>
  49 #include <rpc/rpc.h>
  50 #include <errno.h>
  51 #include <thread.h>
  52 #include <sys/resource.h>
  53 #include <sys/time.h>
  54 #include <sys/file.h>
  55 #include <nfs/nfs.h>
  56 #include <nfs/nfs_acl.h>
  57 #include <nfs/nfssys.h>
  58 #include <stdio.h>
  59 #include <stdio_ext.h>
  60 #include <stdlib.h>
  61 #include <signal.h>
  62 #include <netconfig.h>
  63 #include <netdir.h>
  64 #include <string.h>
  65 #include <unistd.h>
  66 #include <stropts.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/wait.h>
  69 #include <poll.h>
  70 #include <priv_utils.h>
  71 #include <sys/tiuser.h>
  72 #include <netinet/tcp.h>
  73 #include <deflt.h>
  74 #include <rpcsvc/daemon_utils.h>
  75 #include <rpcsvc/nfs4_prot.h>
  76 #include <libnvpair.h>
  77 #include "nfs_tbind.h"
  78 #include "thrpool.h"
  79 
  80 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
  81 #define QUIESCE_VERSMIN 4
  82 /* DSS: distributed stable storage */
  83 #define DSS_VERSMIN     4
  84 
  85 static  int     nfssvc(int, struct netbuf, struct netconfig *);
  86 static  int     nfssvcpool(int maxservers);
  87 static  int     dss_init(uint_t npaths, char **pathnames);
  88 static  void    dss_mkleafdirs(uint_t npaths, char **pathnames);
  89 static  void    dss_mkleafdir(char *dir, char *leaf, char *path);
  90 static  void    usage(void);
  91 int             qstrcmp(const void *s1, const void *s2);
  92 
  93 extern  int     _nfssys(int, void *);
  94 
  95 extern int      daemonize_init(void);
  96 extern void     daemonize_fini(int fd);
  97 
  98 /* signal handlers */
  99 static void sigflush(int);
 100 static void quiesce(int);
 101 
 102 static  char    *MyName;
 103 static  NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
 104                                             "/dev/udp6", NULL };
 105 /* static       NETSELDECL(defaultprotos)[] =   { NC_UDP, NC_TCP, NULL }; */
 106 /*
 107  * The following are all globals used by routines in nfs_tbind.c.
 108  */
 109 size_t  end_listen_fds;         /* used by conn_close_oldest() */
 110 size_t  num_fds = 0;            /* used by multiple routines */
 111 int     listen_backlog = 32;    /* used by bind_to_{provider,proto}() */
 112 int     num_servers;            /* used by cots_listen_event() */
 113 int     (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
 114                                 /* used by cots_listen_event() */
 115 int     max_conns_allowed = -1; /* used by cots_listen_event() */
 116 
 117 /*
 118  * Keep track of min/max versions of NFS protocol to be started.
 119  * Start with the defaults (min == 2, max == 3).  We have the
 120  * capability of starting vers=4 but only if the user requests it.
 121  */
 122 int     nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 123 int     nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 124 
 125 /*
 126  * Set the default for server delegation enablement and set per
 127  * /etc/default/nfs configuration (if present).
 128  */
 129 int     nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
 130 
 131 /*
 132  * Default values for TCP send and receive buffer size of NFS server
 133  * connections.
 134  *
 135  * These values can be tuned by user via /etc/default/nfs configuration
 136  * file by setting NFS_SERVER_SNDBUFSZ and NFS_SERVER_RCVBUFSZ.
 137  *
 138  * To force NFS connections to use system-wide default for TCP send and
 139  * receive buffer, set NFS_SERVER_SNDBUFSZ and NFS_SERVER_RCVBUFSZ to 0.
 140  */
 141 int     nfs_server_sndbufsz = 1048576;
 142 int     nfs_server_rcvbufsz = 1048576;
 143 
 144 int
 145 main(int ac, char *av[])
 146 {
 147         char *dir = "/";
 148         int allflag = 0;
 149         int df_allflag = 0;
 150         int opt_cnt = 0;
 151         int maxservers = 1;     /* zero allows inifinte number of threads */
 152         int maxservers_set = 0;
 153         int logmaxservers = 0;
 154         int pid;
 155         int i, bufsz;
 156         char *provider = (char *)NULL;
 157         char *df_provider = (char *)NULL;
 158         struct protob *protobp0, *protobp;
 159         NETSELDECL(proto) = NULL;
 160         NETSELDECL(df_proto) = NULL;
 161         NETSELPDECL(providerp);
 162         char *defval;
 163         boolean_t can_do_mlp;
 164         uint_t dss_npaths = 0;
 165         char **dss_pathnames = NULL;
 166         sigset_t sgset;
 167 
 168         int pipe_fd = -1;
 169 
 170         MyName = *av;
 171 
 172         /*
 173          * Initializations that require more privileges than we need to run.
 174          */
 175         (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
 176         svcsetprio();
 177 
 178         can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
 179         if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
 180             DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
 181             can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
 182                 (void) fprintf(stderr, "%s should be run with"
 183                     " sufficient privileges\n", av[0]);
 184                 exit(1);
 185         }
 186 
 187         (void) enable_extended_FILE_stdio(-1, -1);
 188 
 189         /*
 190          * Read in the values from config file first before we check
 191          * command line options so the options override the file.
 192          */
 193         if ((defopen(NFSADMIN)) == 0) {
 194                 if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
 195                         errno = 0;
 196                         max_conns_allowed = strtol(defval, (char **)NULL, 10);
 197                         if (errno != 0) {
 198                                 max_conns_allowed = -1;
 199                         }
 200                 }
 201                 if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
 202                         errno = 0;
 203                         listen_backlog = strtol(defval, (char **)NULL, 10);
 204                         if (errno != 0) {
 205                                 listen_backlog = 32;
 206                         }
 207                 }
 208                 if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
 209                         df_proto = strdup(defval);
 210                         opt_cnt++;
 211                         if (strncasecmp("ALL", defval, 3) == 0) {
 212                                 free(df_proto);
 213                                 df_proto = NULL;
 214                                 df_allflag = 1;
 215                         }
 216                 }
 217                 if ((defval = defread("NFSD_DEVICE=")) != NULL) {
 218                         df_provider = strdup(defval);
 219                         opt_cnt++;
 220                 }
 221                 if ((defval = defread("NFSD_SERVERS=")) != NULL) {
 222                         errno = 0;
 223                         maxservers = strtol(defval, (char **)NULL, 10);
 224                         if (errno != 0) {
 225                                 maxservers = 1;
 226                         } else {
 227                                 maxservers_set = 1;
 228                         }
 229                 }
 230                 if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
 231                         errno = 0;
 232                         nfs_server_vers_min =
 233                             strtol(defval, (char **)NULL, 10);
 234                         if (errno != 0) {
 235                                 nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 236                         }
 237                 }
 238                 if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
 239                         errno = 0;
 240                         nfs_server_vers_max =
 241                             strtol(defval, (char **)NULL, 10);
 242                         if (errno != 0) {
 243                                 nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 244                         }
 245                 }
 246                 if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
 247                         if (strcmp(defval, "off") == 0) {
 248                                 nfs_server_delegation = FALSE;
 249                         }
 250                 }
 251                 if ((defval = defread("NFS_SERVER_SNDBUFSZ=")) != NULL) {
 252                         errno = 0;
 253                         bufsz = strtol(defval, (char **)NULL, 10);
 254                         if (errno == 0)
 255                                 nfs_server_sndbufsz = bufsz;
 256                 }
 257                 if ((defval = defread("NFS_SERVER_RCVBUFSZ=")) != NULL) {
 258                         errno = 0;
 259                         bufsz = strtol(defval, (char **)NULL, 10);
 260                         if (errno == 0)
 261                                 nfs_server_rcvbufsz = bufsz;
 262                 }
 263 
 264                 /* close defaults file */
 265                 defopen(NULL);
 266         }
 267 
 268         /*
 269          * Conflict options error messages.
 270          */
 271         if (opt_cnt > 1) {
 272                 (void) fprintf(stderr, "\nConflicting options, only one of "
 273                     "the following options can be specified\n"
 274                     "in " NFSADMIN ":\n"
 275                     "\tNFSD_PROTOCOL=ALL\n"
 276                     "\tNFSD_PROTOCOL=protocol\n"
 277                     "\tNFSD_DEVICE=device\n\n");
 278                 usage();
 279         }
 280         opt_cnt = 0;
 281 
 282         while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
 283                 switch (i) {
 284                 case 'a':
 285                         free(df_proto);
 286                         df_proto = NULL;
 287                         free(df_provider);
 288                         df_provider = NULL;
 289 
 290                         allflag = 1;
 291                         opt_cnt++;
 292                         break;
 293 
 294                 case 'c':
 295                         max_conns_allowed = atoi(optarg);
 296                         break;
 297 
 298                 case 'p':
 299                         proto = optarg;
 300                         df_allflag = 0;
 301                         opt_cnt++;
 302                         break;
 303 
 304                 /*
 305                  * DSS: NFSv4 distributed stable storage.
 306                  *
 307                  * This is a Contracted Project Private interface, for
 308                  * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
 309                  */
 310                 case 's':
 311                         if (strlen(optarg) < MAXPATHLEN) {
 312                                 /* first "-s" option encountered? */
 313                                 if (dss_pathnames == NULL) {
 314                                         /*
 315                                          * Allocate maximum possible space
 316                                          * required given cmdline arg count;
 317                                          * "-s <path>" consumes two args.
 318                                          */
 319                                         size_t sz = (ac / 2) * sizeof (char *);
 320                                         dss_pathnames = (char **)malloc(sz);
 321                                         if (dss_pathnames == NULL) {
 322                                                 (void) fprintf(stderr, "%s: "
 323                                                     "dss paths malloc failed\n",
 324                                                     av[0]);
 325                                                 exit(1);
 326                                         }
 327                                         (void) memset(dss_pathnames, 0, sz);
 328                                 }
 329                                 dss_pathnames[dss_npaths] = optarg;
 330                                 dss_npaths++;
 331                         } else {
 332                                 (void) fprintf(stderr,
 333                                     "%s: -s pathname too long.\n", av[0]);
 334                         }
 335                         break;
 336 
 337                 case 't':
 338                         provider = optarg;
 339                         df_allflag = 0;
 340                         opt_cnt++;
 341                         break;
 342 
 343                 case 'l':
 344                         listen_backlog = atoi(optarg);
 345                         break;
 346 
 347                 case '?':
 348                         usage();
 349                         /* NOTREACHED */
 350                 }
 351         }
 352 
 353         allflag = df_allflag;
 354         if (proto == NULL)
 355                 proto = df_proto;
 356         if (provider == NULL)
 357                 provider = df_provider;
 358 
 359         /*
 360          * Conflict options error messages.
 361          */
 362         if (opt_cnt > 1) {
 363                 (void) fprintf(stderr, "\nConflicting options, only one of "
 364                     "the following options can be specified\n"
 365                     "on the command line:\n"
 366                     "\t-a\n"
 367                     "\t-p protocol\n"
 368                     "\t-t transport\n\n");
 369                 usage();
 370         }
 371 
 372         if (proto != NULL &&
 373             strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
 374                 if (nfs_server_vers_max == NFS_V4) {
 375                         if (nfs_server_vers_min == NFS_V4) {
 376                                 fprintf(stderr,
 377                                     "NFS version 4 is not supported "
 378                                     "with the UDP protocol.  Exiting\n");
 379                                 exit(3);
 380                         } else {
 381                                 fprintf(stderr,
 382                                     "NFS version 4 is not supported "
 383                                     "with the UDP protocol.\n");
 384                         }
 385                 }
 386         }
 387 
 388         /*
 389          * If there is exactly one more argument, it is the number of
 390          * servers.
 391          */
 392         if (optind == ac - 1) {
 393                 maxservers = atoi(av[optind]);
 394                 maxservers_set = 1;
 395         }
 396         /*
 397          * If there are two or more arguments, then this is a usage error.
 398          */
 399         else if (optind < ac - 1)
 400                 usage();
 401         /*
 402          * Check the ranges for min/max version specified
 403          */
 404         else if ((nfs_server_vers_min > nfs_server_vers_max) ||
 405             (nfs_server_vers_min < NFS_VERSMIN) ||
 406             (nfs_server_vers_max > NFS_VERSMAX))
 407                 usage();
 408         /*
 409          * There are no additional arguments, and we haven't set maxservers
 410          * explicitly via the config file, we use a default number of
 411          * servers.  We will log this.
 412          */
 413         else if (maxservers_set == 0)
 414                 logmaxservers = 1;
 415 
 416         /*
 417          * Basic Sanity checks on options
 418          *
 419          * max_conns_allowed must be positive, except for the special
 420          * value of -1 which is used internally to mean unlimited, -1 isn't
 421          * documented but we allow it anyway.
 422          *
 423          * maxservers must be positive
 424          * listen_backlog must be positive or zero
 425          */
 426         if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
 427             (listen_backlog < 0) || (maxservers <= 0)) {
 428                 usage();
 429         }
 430 
 431         /*
 432          * Set current dir to server root
 433          */
 434         if (chdir(dir) < 0) {
 435                 (void) fprintf(stderr, "%s:  ", MyName);
 436                 perror(dir);
 437                 exit(1);
 438         }
 439 
 440 #ifndef DEBUG
 441         pipe_fd = daemonize_init();
 442 #endif
 443 
 444         openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
 445 
 446         /*
 447          * establish our lock on the lock file and write our pid to it.
 448          * exit if some other process holds the lock, or if there's any
 449          * error in writing/locking the file.
 450          */
 451         pid = _enter_daemon_lock(NFSD);
 452         switch (pid) {
 453         case 0:
 454                 break;
 455         case -1:
 456                 fprintf(stderr, "error locking for %s: %s", NFSD,
 457                     strerror(errno));
 458                 exit(2);
 459         default:
 460                 /* daemon was already running */
 461                 exit(0);
 462         }
 463 
 464         /*
 465          * If we've been given a list of paths to be used for distributed
 466          * stable storage, and provided we're going to run a version
 467          * that supports it, setup the DSS paths.
 468          */
 469         if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
 470                 if (dss_init(dss_npaths, dss_pathnames) != 0) {
 471                         fprintf(stderr, "%s", "dss_init failed. Exiting.");
 472                         exit(1);
 473                 }
 474         }
 475 
 476         /*
 477          * Block all signals till we spawn other
 478          * threads.
 479          */
 480         (void) sigfillset(&sgset);
 481         (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
 482 
 483         if (logmaxservers) {
 484                 fprintf(stderr,
 485                     "Number of servers not specified. Using default of %d.",
 486                     maxservers);
 487         }
 488 
 489         /*
 490          * Make sure to unregister any previous versions in case the
 491          * user is reconfiguring the server in interesting ways.
 492          */
 493         svc_unreg(NFS_PROGRAM, NFS_VERSION);
 494         svc_unreg(NFS_PROGRAM, NFS_V3);
 495         svc_unreg(NFS_PROGRAM, NFS_V4);
 496         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
 497         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
 498 
 499         /*
 500          * Set up kernel RPC thread pool for the NFS server.
 501          */
 502         if (nfssvcpool(maxservers)) {
 503                 fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting",
 504                     strerror(errno));
 505                 exit(1);
 506         }
 507 
 508         /*
 509          * Set up blocked thread to do LWP creation on behalf of the kernel.
 510          */
 511         if (svcwait(NFS_SVCPOOL_ID)) {
 512                 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting",
 513                     strerror(errno));
 514                 exit(1);
 515         }
 516 
 517         /*
 518          * RDMA start and stop thread.
 519          * Per pool RDMA listener creation and
 520          * destructor thread.
 521          *
 522          * start rdma services and block in the kernel.
 523          * (only if proto or provider is not set to TCP or UDP)
 524          */
 525         if ((proto == NULL) && (provider == NULL)) {
 526                 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
 527                     nfs_server_vers_max, nfs_server_delegation)) {
 528                         fprintf(stderr,
 529                             "Can't set up RDMA creator thread : %s",
 530                             strerror(errno));
 531                 }
 532         }
 533 
 534         /*
 535          * Now open up for signal delivery
 536          */
 537 
 538         (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
 539         sigset(SIGTERM, sigflush);
 540         sigset(SIGUSR1, quiesce);
 541 
 542         /*
 543          * Build a protocol block list for registration.
 544          */
 545         protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
 546         protobp->serv = "NFS";
 547         protobp->versmin = nfs_server_vers_min;
 548         protobp->versmax = nfs_server_vers_max;
 549         protobp->program = NFS_PROGRAM;
 550 
 551         protobp->next = (struct protob *)malloc(sizeof (struct protob));
 552         protobp = protobp->next;
 553         protobp->serv = "NFS_ACL";           /* not used */
 554         protobp->versmin = nfs_server_vers_min;
 555         /* XXX - this needs work to get the version just right */
 556         protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
 557             NFS_ACL_V3 : nfs_server_vers_max;
 558         protobp->program = NFS_ACL_PROGRAM;
 559         protobp->next = (struct protob *)NULL;
 560 
 561         if (allflag) {
 562                 if (do_all_setbuf(protobp0, nfssvc, 0, nfs_server_sndbufsz,
 563                     nfs_server_rcvbufsz) == -1) {
 564                         fprintf(stderr, "setnetconfig failed : %s",
 565                             strerror(errno));
 566                         exit(1);
 567                 }
 568         } else if (proto) {
 569                 /* there's more than one match for the same protocol */
 570                 struct netconfig *nconf;
 571                 NCONF_HANDLE *nc;
 572                 bool_t  protoFound = FALSE;
 573                 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
 574                         fprintf(stderr, "setnetconfig failed : %s",
 575                             strerror(errno));
 576                         goto done;
 577                 }
 578                 while (nconf = getnetconfig(nc)) {
 579                         if (strcmp(nconf->nc_proto, proto) == 0) {
 580                                 protoFound = TRUE;
 581                                 do_one_setbuf(nconf->nc_device, NULL,
 582                                     protobp0, nfssvc, 0,
 583                                     nfs_server_sndbufsz, nfs_server_rcvbufsz);
 584                         }
 585                 }
 586                 (void) endnetconfig(nc);
 587                 if (protoFound == FALSE) {
 588                         fprintf(stderr,
 589                             "couldn't find netconfig entry for protocol %s",
 590                             proto);
 591                 }
 592         } else if (provider)
 593                 do_one_setbuf(provider, proto, protobp0, nfssvc, 0,
 594                     nfs_server_sndbufsz, nfs_server_rcvbufsz);
 595         else {
 596                 for (providerp = defaultproviders;
 597                     *providerp != NULL; providerp++) {
 598                         provider = *providerp;
 599                         do_one_setbuf(provider, NULL, protobp0, nfssvc, 0,
 600                             nfs_server_sndbufsz, nfs_server_rcvbufsz);
 601                 }
 602         }
 603 done:
 604 
 605         free(protobp);
 606         free(protobp0);
 607 
 608         if (num_fds == 0) {
 609                 fprintf(stderr, "Could not start NFS service for any protocol."
 610                     " Exiting");
 611                 exit(1);
 612         }
 613 
 614         end_listen_fds = num_fds;
 615 
 616         /*
 617          * nfsd is up and running as far as we are concerned.
 618          */
 619         daemonize_fini(pipe_fd);
 620 
 621         /*
 622          * Get rid of unneeded privileges.
 623          */
 624         __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
 625             PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
 626 
 627         /*
 628          * Poll for non-data control events on the transport descriptors.
 629          */
 630         poll_for_action();
 631 
 632         /*
 633          * If we get here, something failed in poll_for_action().
 634          */
 635         return (1);
 636 }
 637 
 638 static int
 639 nfssvcpool(int maxservers)
 640 {
 641         struct svcpool_args npa;
 642 
 643         npa.id = NFS_SVCPOOL_ID;
 644         npa.maxthreads = maxservers;
 645         npa.redline = 0;
 646         npa.qsize = 0;
 647         npa.timeout = 0;
 648         npa.stksize = 0;
 649         npa.max_same_xprt = 0;
 650         return (_nfssys(SVCPOOL_CREATE, &npa));
 651 }
 652 
 653 /*
 654  * Establish NFS service thread.
 655  */
 656 static int
 657 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
 658 {
 659         struct nfs_svc_args nsa;
 660 
 661         nsa.fd = fd;
 662         nsa.netid = nconf->nc_netid;
 663         nsa.addrmask = addrmask;
 664         if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
 665                 nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
 666                     NFS_V3 : nfs_server_vers_max;
 667                 nsa.versmin = nfs_server_vers_min;
 668                 /*
 669                  * If no version left, silently do nothing, previous
 670                  * checks will have assured at least TCP is available.
 671                  */
 672                 if (nsa.versmin > nsa.versmax)
 673                         return (0);
 674         } else {
 675                 nsa.versmax = nfs_server_vers_max;
 676                 nsa.versmin = nfs_server_vers_min;
 677         }
 678         nsa.delegation = nfs_server_delegation;
 679         return (_nfssys(NFS_SVC, &nsa));
 680 }
 681 
 682 static void
 683 usage(void)
 684 {
 685         (void) fprintf(stderr,
 686 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
 687         (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
 688         (void) fprintf(stderr,
 689 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
 690         (void) fprintf(stderr,
 691 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
 692         (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
 693         (void) fprintf(stderr, "> zero,\n");
 694         (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
 695         (void) fprintf(stderr,
 696             "\ttransport is a transport provider name (i.e. device),\n");
 697         (void) fprintf(stderr,
 698             "\tlisten_backlog is the TCP listen backlog,\n");
 699         (void) fprintf(stderr,
 700             "\tand <nservers> must be a decimal number > zero.\n");
 701         exit(1);
 702 }
 703 
 704 /*
 705  * Issue nfssys system call to flush all logging buffers asynchronously.
 706  *
 707  * NOTICE: It is extremely important to flush NFS logging buffers when
 708  *         nfsd exits. When the system is halted or rebooted nfslogd
 709  *         may not have an opportunity to flush the buffers.
 710  */
 711 static void
 712 nfsl_flush()
 713 {
 714         struct nfsl_flush_args nfa;
 715 
 716         memset((void *)&nfa, 0, sizeof (nfa));
 717         nfa.version = NFSL_FLUSH_ARGS_VERS;
 718         nfa.directive = NFSL_ALL;       /* flush all asynchronously */
 719 
 720         if (_nfssys(LOG_FLUSH, &nfa) < 0)
 721                 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
 722                     strerror(errno));
 723 }
 724 
 725 /*
 726  * SIGTERM handler.
 727  * Flush logging buffers and exit.
 728  */
 729 static void
 730 sigflush(int sig)
 731 {
 732         nfsl_flush();
 733         _exit(0);
 734 }
 735 
 736 /*
 737  * SIGUSR1 handler.
 738  *
 739  * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
 740  *
 741  * This is a Contracted Project Private interface, for the sole use
 742  * of Sun Cluster HA-NFS. See PSARC/2004/497.
 743  *
 744  * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
 745  */
 746 static void
 747 quiesce(int sig)
 748 {
 749         int error;
 750         int id = NFS_SVCPOOL_ID;
 751 
 752         if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
 753                 /* Request server quiesce at next shutdown */
 754                 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
 755 
 756                 /*
 757                  * ENOENT is returned if there is no matching SVC pool
 758                  * for the id. Possibly because the pool is not yet setup.
 759                  * In this case, just exit as if no error. For all other errors,
 760                  * just return and allow caller to retry.
 761                  */
 762                 if (error && errno != ENOENT) {
 763                         syslog(LOG_ERR,
 764                             "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
 765                             strerror(errno));
 766                         return;
 767                 }
 768         }
 769 
 770         /* Flush logging buffers */
 771         nfsl_flush();
 772 
 773         _exit(0);
 774 }
 775 
 776 /*
 777  * DSS: distributed stable storage.
 778  * Create leaf directories as required, keeping an eye on path
 779  * lengths. Calls exit(1) on failure.
 780  * The pathnames passed in must already exist, and must be writeable by nfsd.
 781  * Note: the leaf directories under NFS4_VAR_DIR are not created here;
 782  * they're created at pkg install.
 783  */
 784 static void
 785 dss_mkleafdirs(uint_t npaths, char **pathnames)
 786 {
 787         int i;
 788         char *tmppath = NULL;
 789 
 790         /*
 791          * Create the temporary storage used by dss_mkleafdir() here,
 792          * rather than in that function, so that it only needs to be
 793          * done once, rather than once for each call. Too big to put
 794          * on the function's stack.
 795          */
 796         tmppath = (char *)malloc(MAXPATHLEN);
 797         if (tmppath == NULL) {
 798                 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
 799                 exit(1);
 800         }
 801 
 802         for (i = 0; i < npaths; i++) {
 803                 char *p = pathnames[i];
 804 
 805                 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
 806                 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
 807         }
 808 
 809         free(tmppath);
 810 }
 811 
 812 /*
 813  * Create "leaf" in "dir" (which must already exist).
 814  * leaf: should start with a '/'
 815  */
 816 static void
 817 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
 818 {
 819         /* MAXPATHLEN includes the terminating NUL */
 820         if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
 821                 fprintf(stderr, "stable storage path too long: %s%s. Exiting",
 822                     dir, leaf);
 823                 exit(1);
 824         }
 825 
 826         (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
 827 
 828         /* the directory may already exist: that's OK */
 829         if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
 830                 fprintf(stderr, "error creating stable storage directory: "
 831                     "%s: %s. Exiting", strerror(errno), tmppath);
 832                 exit(1);
 833         }
 834 }
 835 
 836 /*
 837  * Create the storage dirs, and pass the path list to the kernel.
 838  * This requires the nfssrv module to be loaded; the _nfssys() syscall
 839  * will fail ENOTSUP if it is not.
 840  * Use libnvpair(3LIB) to pass the data to the kernel.
 841  */
 842 static int
 843 dss_init(uint_t npaths, char **pathnames)
 844 {
 845         int i, j, nskipped, error;
 846         char *bufp;
 847         uint32_t bufsize;
 848         size_t buflen;
 849         nvlist_t *nvl;
 850 
 851         if (npaths > 1) {
 852                 /*
 853                  * We need to remove duplicate paths; this might be user error
 854                  * in the general case, but HA-NFSv4 can also cause this.
 855                  * Sort the pathnames array, and NULL out duplicates,
 856                  * then write the non-NULL entries to a new array.
 857                  * Sorting will also allow the kernel to optimise its searches.
 858                  */
 859 
 860                 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
 861 
 862                 /* now NULL out any duplicates */
 863                 i = 0; j = 1; nskipped = 0;
 864                 while (j < npaths) {
 865                         if (strcmp(pathnames[i], pathnames[j]) == NULL) {
 866                                 pathnames[j] = NULL;
 867                                 j++;
 868                                 nskipped++;
 869                                 continue;
 870                         }
 871 
 872                         /* skip i over any of its NULLed duplicates */
 873                         i = j++;
 874                 }
 875 
 876                 /* finally, write the non-NULL entries to a new array */
 877                 if (nskipped > 0) {
 878                         int nreal;
 879                         size_t sz;
 880                         char **tmp_pathnames;
 881 
 882                         nreal = npaths - nskipped;
 883 
 884                         sz = nreal * sizeof (char *);
 885                         tmp_pathnames = (char **)malloc(sz);
 886                         if (tmp_pathnames == NULL) {
 887                                 fprintf(stderr, "tmp_pathnames malloc failed");
 888                                 exit(1);
 889                         }
 890 
 891                         for (i = 0, j = 0; i < npaths; i++)
 892                                 if (pathnames[i] != NULL)
 893                                         tmp_pathnames[j++] = pathnames[i];
 894                         free(pathnames);
 895                         pathnames = tmp_pathnames;
 896                         npaths = nreal;
 897                 }
 898 
 899         }
 900 
 901         /* Create directories to store the distributed state files */
 902         dss_mkleafdirs(npaths, pathnames);
 903 
 904         /* Create the name-value pair list */
 905         error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
 906         if (error) {
 907                 fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno));
 908                 return (1);
 909         }
 910 
 911         /* Add the pathnames array as a single name-value pair */
 912         error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
 913             pathnames, npaths);
 914         if (error) {
 915                 fprintf(stderr, "nvlist_add_string_array failed: %s.",
 916                     strerror(errno));
 917                 nvlist_free(nvl);
 918                 return (1);
 919         }
 920 
 921         /*
 922          * Pack list into contiguous memory, for passing to kernel.
 923          * nvlist_pack() will allocate the memory for the buffer,
 924          * which we should free() when no longer needed.
 925          * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
 926          */
 927         bufp = NULL;
 928         error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
 929         if (error) {
 930                 fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno));
 931                 nvlist_free(nvl);
 932                 return (1);
 933         }
 934 
 935         /* Now we have the packed buffer, we no longer need the list */
 936         nvlist_free(nvl);
 937 
 938         /*
 939          * Let the kernel know in advance how big the buffer is.
 940          * NOTE: we cannot just pass buflen, since size_t is a long, and
 941          * thus a different size between ILP32 userland and LP64 kernel.
 942          * Use an int for the transfer, since that should be big enough;
 943          * this is a no-op at the moment, here, since nfsd is 32-bit, but
 944          * that could change.
 945          */
 946         bufsize = (uint32_t)buflen;
 947         error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
 948         if (error) {
 949                 fprintf(stderr,
 950                     "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
 951                     strerror(errno));
 952                 free(bufp);
 953                 return (1);
 954         }
 955 
 956         /* Pass the packed buffer to the kernel */
 957         error = _nfssys(NFS4_DSS_SETPATHS, bufp);
 958         if (error) {
 959                 fprintf(stderr,
 960                     "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
 961                 free(bufp);
 962                 return (1);
 963         }
 964 
 965         /*
 966          * The kernel has now unpacked the buffer and extracted the
 967          * pathnames array, we no longer need the buffer.
 968          */
 969         free(bufp);
 970 
 971         return (0);
 972 }
 973 
 974 /*
 975  * Quick sort string compare routine, for qsort.
 976  * Needed to make arg types correct.
 977  */
 978 int
 979 qstrcmp(const void *p1, const void *p2)
 980 {
 981         char *s1 = *((char **)p1);
 982         char *s2 = *((char **)p2);
 983 
 984         return (strcmp(s1, s2));
 985 }