1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T             */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * University Copyright- Copyright (c) 1982, 1986, 1988
  31  * The Regents of the University of California
  32  * All Rights Reserved
  33  *
  34  * University Acknowledgment- Portions of this document are derived from
  35  * software developed by the University of California, Berkeley, and its
  36  * contributors.
  37  */
  38 
  39 /* LINTLIBRARY */
  40 /* PROTOLIB1 */
  41 
  42 /* NFS server */
  43 
  44 #include <sys/param.h>
  45 #include <sys/types.h>
  46 #include <sys/stat.h>
  47 #include <syslog.h>
  48 #include <tiuser.h>
  49 #include <rpc/rpc.h>
  50 #include <errno.h>
  51 #include <thread.h>
  52 #include <sys/resource.h>
  53 #include <sys/time.h>
  54 #include <sys/file.h>
  55 #include <nfs/nfs.h>
  56 #include <nfs/nfs_acl.h>
  57 #include <nfs/nfssys.h>
  58 #include <stdio.h>
  59 #include <stdio_ext.h>
  60 #include <stdlib.h>
  61 #include <signal.h>
  62 #include <netconfig.h>
  63 #include <netdir.h>
  64 #include <string.h>
  65 #include <unistd.h>
  66 #include <stropts.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/wait.h>
  69 #include <poll.h>
  70 #include <priv_utils.h>
  71 #include <sys/tiuser.h>
  72 #include <netinet/tcp.h>
  73 #include <deflt.h>
  74 #include <rpcsvc/daemon_utils.h>
  75 #include <rpcsvc/nfs4_prot.h>
  76 #include <libnvpair.h>
  77 #include "nfs_tbind.h"
  78 #include "thrpool.h"
  79 
  80 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
  81 #define QUIESCE_VERSMIN 4
  82 /* DSS: distributed stable storage */
  83 #define DSS_VERSMIN     4
  84 
  85 static  int     nfssvc(int, struct netbuf, struct netconfig *);
  86 static  int     nfssvcpool(int maxservers);
  87 static  int     dss_init(uint_t npaths, char **pathnames);
  88 static  void    dss_mkleafdirs(uint_t npaths, char **pathnames);
  89 static  void    dss_mkleafdir(char *dir, char *leaf, char *path);
  90 static  void    usage(void);
  91 int             qstrcmp(const void *s1, const void *s2);
  92 
  93 extern  int     _nfssys(int, void *);
  94 
  95 extern int      daemonize_init(void);
  96 extern void     daemonize_fini(int fd);
  97 
  98 /* signal handlers */
  99 static void sigflush(int);
 100 static void quiesce(int);
 101 
 102 static  char    *MyName;
 103 static  NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
 104                                             "/dev/udp6", NULL };
 105 /* static       NETSELDECL(defaultprotos)[] =   { NC_UDP, NC_TCP, NULL }; */
 106 /*
 107  * The following are all globals used by routines in nfs_tbind.c.
 108  */
 109 size_t  end_listen_fds;         /* used by conn_close_oldest() */
 110 size_t  num_fds = 0;            /* used by multiple routines */
 111 int     listen_backlog = 32;    /* used by bind_to_{provider,proto}() */
 112 int     num_servers;            /* used by cots_listen_event() */
 113 int     (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
 114                                 /* used by cots_listen_event() */
 115 int     max_conns_allowed = -1; /* used by cots_listen_event() */
 116 
 117 /*
 118  * Keep track of min/max versions of NFS protocol to be started.
 119  * Start with the defaults (min == 2, max == 3).  We have the
 120  * capability of starting vers=4 but only if the user requests it.
 121  */
 122 int     nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 123 int     nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 124 
 125 /*
 126  * Set the default for server delegation enablement and set per
 127  * /etc/default/nfs configuration (if present).
 128  */
 129 int     nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
 130 
 131 int
 132 main(int ac, char *av[])
 133 {
 134         char *dir = "/";
 135         int allflag = 0;
 136         int df_allflag = 0;
 137         int opt_cnt = 0;
 138         int maxservers = 1;     /* zero allows inifinte number of threads */
 139         int maxservers_set = 0;
 140         int logmaxservers = 0;
 141         int pid;
 142         int i;
 143         char *provider = (char *)NULL;
 144         char *df_provider = (char *)NULL;
 145         struct protob *protobp0, *protobp;
 146         NETSELDECL(proto) = NULL;
 147         NETSELDECL(df_proto) = NULL;
 148         NETSELPDECL(providerp);
 149         char *defval;
 150         boolean_t can_do_mlp;
 151         uint_t dss_npaths = 0;
 152         char **dss_pathnames = NULL;
 153         sigset_t sgset;
 154 
 155         int pipe_fd = -1;
 156 
 157         MyName = *av;
 158 
 159         /*
 160          * Initializations that require more privileges than we need to run.
 161          */
 162         (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
 163         svcsetprio();
 164 
 165         can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
 166         if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
 167             DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
 168             can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
 169                 (void) fprintf(stderr, "%s should be run with"
 170                     " sufficient privileges\n", av[0]);
 171                 exit(1);
 172         }
 173 
 174         (void) enable_extended_FILE_stdio(-1, -1);
 175 
 176         /*
 177          * Read in the values from config file first before we check
 178          * command line options so the options override the file.
 179          */
 180         if ((defopen(NFSADMIN)) == 0) {
 181                 if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
 182                         errno = 0;
 183                         max_conns_allowed = strtol(defval, (char **)NULL, 10);
 184                         if (errno != 0) {
 185                                 max_conns_allowed = -1;
 186                         }
 187                 }
 188                 if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
 189                         errno = 0;
 190                         listen_backlog = strtol(defval, (char **)NULL, 10);
 191                         if (errno != 0) {
 192                                 listen_backlog = 32;
 193                         }
 194                 }
 195                 if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
 196                         df_proto = strdup(defval);
 197                         opt_cnt++;
 198                         if (strncasecmp("ALL", defval, 3) == 0) {
 199                                 free(df_proto);
 200                                 df_proto = NULL;
 201                                 df_allflag = 1;
 202                         }
 203                 }
 204                 if ((defval = defread("NFSD_DEVICE=")) != NULL) {
 205                         df_provider = strdup(defval);
 206                         opt_cnt++;
 207                 }
 208                 if ((defval = defread("NFSD_SERVERS=")) != NULL) {
 209                         errno = 0;
 210                         maxservers = strtol(defval, (char **)NULL, 10);
 211                         if (errno != 0) {
 212                                 maxservers = 1;
 213                         } else {
 214                                 maxservers_set = 1;
 215                         }
 216                 }
 217                 if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
 218                         errno = 0;
 219                         nfs_server_vers_min =
 220                             strtol(defval, (char **)NULL, 10);
 221                         if (errno != 0) {
 222                                 nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 223                         }
 224                 }
 225                 if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
 226                         errno = 0;
 227                         nfs_server_vers_max =
 228                             strtol(defval, (char **)NULL, 10);
 229                         if (errno != 0) {
 230                                 nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 231                         }
 232                 }
 233                 if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
 234                         if (strcmp(defval, "off") == 0) {
 235                                 nfs_server_delegation = FALSE;
 236                         }
 237                 }
 238 
 239                 /* close defaults file */
 240                 defopen(NULL);
 241         }
 242 
 243         /*
 244          * Conflict options error messages.
 245          */
 246         if (opt_cnt > 1) {
 247                 (void) fprintf(stderr, "\nConflicting options, only one of "
 248                     "the following options can be specified\n"
 249                     "in " NFSADMIN ":\n"
 250                     "\tNFSD_PROTOCOL=ALL\n"
 251                     "\tNFSD_PROTOCOL=protocol\n"
 252                     "\tNFSD_DEVICE=device\n\n");
 253                 usage();
 254         }
 255         opt_cnt = 0;
 256 
 257         while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
 258                 switch (i) {
 259                 case 'a':
 260                         free(df_proto);
 261                         df_proto = NULL;
 262                         free(df_provider);
 263                         df_provider = NULL;
 264 
 265                         allflag = 1;
 266                         opt_cnt++;
 267                         break;
 268 
 269                 case 'c':
 270                         max_conns_allowed = atoi(optarg);
 271                         break;
 272 
 273                 case 'p':
 274                         proto = optarg;
 275                         df_allflag = 0;
 276                         opt_cnt++;
 277                         break;
 278 
 279                 /*
 280                  * DSS: NFSv4 distributed stable storage.
 281                  *
 282                  * This is a Contracted Project Private interface, for
 283                  * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
 284                  */
 285                 case 's':
 286                         if (strlen(optarg) < MAXPATHLEN) {
 287                                 /* first "-s" option encountered? */
 288                                 if (dss_pathnames == NULL) {
 289                                         /*
 290                                          * Allocate maximum possible space
 291                                          * required given cmdline arg count;
 292                                          * "-s <path>" consumes two args.
 293                                          */
 294                                         size_t sz = (ac / 2) * sizeof (char *);
 295                                         dss_pathnames = (char **)malloc(sz);
 296                                         if (dss_pathnames == NULL) {
 297                                                 (void) fprintf(stderr, "%s: "
 298                                                     "dss paths malloc failed\n",
 299                                                     av[0]);
 300                                                 exit(1);
 301                                         }
 302                                         (void) memset(dss_pathnames, 0, sz);
 303                                 }
 304                                 dss_pathnames[dss_npaths] = optarg;
 305                                 dss_npaths++;
 306                         } else {
 307                                 (void) fprintf(stderr,
 308                                     "%s: -s pathname too long.\n", av[0]);
 309                         }
 310                         break;
 311 
 312                 case 't':
 313                         provider = optarg;
 314                         df_allflag = 0;
 315                         opt_cnt++;
 316                         break;
 317 
 318                 case 'l':
 319                         listen_backlog = atoi(optarg);
 320                         break;
 321 
 322                 case '?':
 323                         usage();
 324                         /* NOTREACHED */
 325                 }
 326         }
 327 
 328         allflag = df_allflag;
 329         if (proto == NULL)
 330                 proto = df_proto;
 331         if (provider == NULL)
 332                 provider = df_provider;
 333 
 334         /*
 335          * Conflict options error messages.
 336          */
 337         if (opt_cnt > 1) {
 338                 (void) fprintf(stderr, "\nConflicting options, only one of "
 339                     "the following options can be specified\n"
 340                     "on the command line:\n"
 341                     "\t-a\n"
 342                     "\t-p protocol\n"
 343                     "\t-t transport\n\n");
 344                 usage();
 345         }
 346 
 347         if (proto != NULL &&
 348             strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
 349                 if (nfs_server_vers_max == NFS_V4) {
 350                         if (nfs_server_vers_min == NFS_V4) {
 351                                 fprintf(stderr,
 352                                     "NFS version 4 is not supported "
 353                                     "with the UDP protocol.  Exiting\n");
 354                                 exit(3);
 355                         } else {
 356                                 fprintf(stderr,
 357                                     "NFS version 4 is not supported "
 358                                     "with the UDP protocol.\n");
 359                         }
 360                 }
 361         }
 362 
 363         /*
 364          * If there is exactly one more argument, it is the number of
 365          * servers.
 366          */
 367         if (optind == ac - 1) {
 368                 maxservers = atoi(av[optind]);
 369                 maxservers_set = 1;
 370         }
 371         /*
 372          * If there are two or more arguments, then this is a usage error.
 373          */
 374         else if (optind < ac - 1)
 375                 usage();
 376         /*
 377          * Check the ranges for min/max version specified
 378          */
 379         else if ((nfs_server_vers_min > nfs_server_vers_max) ||
 380             (nfs_server_vers_min < NFS_VERSMIN) ||
 381             (nfs_server_vers_max > NFS_VERSMAX))
 382                 usage();
 383         /*
 384          * There are no additional arguments, and we haven't set maxservers
 385          * explicitly via the config file, we use a default number of
 386          * servers.  We will log this.
 387          */
 388         else if (maxservers_set == 0)
 389                 logmaxservers = 1;
 390 
 391         /*
 392          * Basic Sanity checks on options
 393          *
 394          * max_conns_allowed must be positive, except for the special
 395          * value of -1 which is used internally to mean unlimited, -1 isn't
 396          * documented but we allow it anyway.
 397          *
 398          * maxservers must be positive
 399          * listen_backlog must be positive or zero
 400          */
 401         if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
 402             (listen_backlog < 0) || (maxservers <= 0)) {
 403                 usage();
 404         }
 405 
 406         /*
 407          * Set current dir to server root
 408          */
 409         if (chdir(dir) < 0) {
 410                 (void) fprintf(stderr, "%s:  ", MyName);
 411                 perror(dir);
 412                 exit(1);
 413         }
 414 
 415 #ifndef DEBUG
 416         pipe_fd = daemonize_init();
 417 #endif
 418 
 419         openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
 420 
 421         /*
 422          * establish our lock on the lock file and write our pid to it.
 423          * exit if some other process holds the lock, or if there's any
 424          * error in writing/locking the file.
 425          */
 426         pid = _enter_daemon_lock(NFSD);
 427         switch (pid) {
 428         case 0:
 429                 break;
 430         case -1:
 431                 fprintf(stderr, "error locking for %s: %s", NFSD,
 432                     strerror(errno));
 433                 exit(2);
 434         default:
 435                 /* daemon was already running */
 436                 exit(0);
 437         }
 438 
 439         /*
 440          * If we've been given a list of paths to be used for distributed
 441          * stable storage, and provided we're going to run a version
 442          * that supports it, setup the DSS paths.
 443          */
 444         if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
 445                 if (dss_init(dss_npaths, dss_pathnames) != 0) {
 446                         fprintf(stderr, "%s", "dss_init failed. Exiting.");
 447                         exit(1);
 448                 }
 449         }
 450 
 451         /*
 452          * Block all signals till we spawn other
 453          * threads.
 454          */
 455         (void) sigfillset(&sgset);
 456         (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
 457 
 458         if (logmaxservers) {
 459                 fprintf(stderr,
 460                     "Number of servers not specified. Using default of %d.",
 461                     maxservers);
 462         }
 463 
 464         /*
 465          * Make sure to unregister any previous versions in case the
 466          * user is reconfiguring the server in interesting ways.
 467          */
 468         svc_unreg(NFS_PROGRAM, NFS_VERSION);
 469         svc_unreg(NFS_PROGRAM, NFS_V3);
 470         svc_unreg(NFS_PROGRAM, NFS_V4);
 471         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
 472         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
 473 
 474         /*
 475          * Set up kernel RPC thread pool for the NFS server.
 476          */
 477         if (nfssvcpool(maxservers)) {
 478                 fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting",
 479                     strerror(errno));
 480                 exit(1);
 481         }
 482 
 483         /*
 484          * Set up blocked thread to do LWP creation on behalf of the kernel.
 485          */
 486         if (svcwait(NFS_SVCPOOL_ID)) {
 487                 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting",
 488                     strerror(errno));
 489                 exit(1);
 490         }
 491 
 492         /*
 493          * RDMA start and stop thread.
 494          * Per pool RDMA listener creation and
 495          * destructor thread.
 496          *
 497          * start rdma services and block in the kernel.
 498          * (only if proto or provider is not set to TCP or UDP)
 499          */
 500         if ((proto == NULL) && (provider == NULL)) {
 501                 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
 502                     nfs_server_vers_max, nfs_server_delegation)) {
 503                         fprintf(stderr,
 504                             "Can't set up RDMA creator thread : %s",
 505                             strerror(errno));
 506                 }
 507         }
 508 
 509         /*
 510          * Now open up for signal delivery
 511          */
 512 
 513         (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
 514         sigset(SIGTERM, sigflush);
 515         sigset(SIGUSR1, quiesce);
 516 
 517         /*
 518          * Build a protocol block list for registration.
 519          */
 520         protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
 521         protobp->serv = "NFS";
 522         protobp->versmin = nfs_server_vers_min;
 523         protobp->versmax = nfs_server_vers_max;
 524         protobp->program = NFS_PROGRAM;
 525 
 526         protobp->next = (struct protob *)malloc(sizeof (struct protob));
 527         protobp = protobp->next;
 528         protobp->serv = "NFS_ACL";           /* not used */
 529         protobp->versmin = nfs_server_vers_min;
 530         /* XXX - this needs work to get the version just right */
 531         protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
 532             NFS_ACL_V3 : nfs_server_vers_max;
 533         protobp->program = NFS_ACL_PROGRAM;
 534         protobp->next = (struct protob *)NULL;
 535 
 536         if (allflag) {
 537                 if (do_all(protobp0, nfssvc, 0) == -1) {
 538                         fprintf(stderr, "setnetconfig failed : %s",
 539                             strerror(errno));
 540                         exit(1);
 541                 }
 542         } else if (proto) {
 543                 /* there's more than one match for the same protocol */
 544                 struct netconfig *nconf;
 545                 NCONF_HANDLE *nc;
 546                 bool_t  protoFound = FALSE;
 547                 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
 548                         fprintf(stderr, "setnetconfig failed : %s",
 549                             strerror(errno));
 550                         goto done;
 551                 }
 552                 while (nconf = getnetconfig(nc)) {
 553                         if (strcmp(nconf->nc_proto, proto) == 0) {
 554                                 protoFound = TRUE;
 555                                 do_one(nconf->nc_device, NULL,
 556                                     protobp0, nfssvc, 0);
 557                         }
 558                 }
 559                 (void) endnetconfig(nc);
 560                 if (protoFound == FALSE) {
 561                         fprintf(stderr,
 562                             "couldn't find netconfig entry for protocol %s",
 563                             proto);
 564                 }
 565         } else if (provider)
 566                 do_one(provider, proto, protobp0, nfssvc, 0);
 567         else {
 568                 for (providerp = defaultproviders;
 569                     *providerp != NULL; providerp++) {
 570                         provider = *providerp;
 571                         do_one(provider, NULL, protobp0, nfssvc, 0);
 572                 }
 573         }
 574 done:
 575 
 576         free(protobp);
 577         free(protobp0);
 578 
 579         if (num_fds == 0) {
 580                 fprintf(stderr, "Could not start NFS service for any protocol."
 581                     " Exiting");
 582                 exit(1);
 583         }
 584 
 585         end_listen_fds = num_fds;
 586 
 587         /*
 588          * nfsd is up and running as far as we are concerned.
 589          */
 590         daemonize_fini(pipe_fd);
 591 
 592         /*
 593          * Get rid of unneeded privileges.
 594          */
 595         __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
 596             PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
 597 
 598         /*
 599          * Poll for non-data control events on the transport descriptors.
 600          */
 601         poll_for_action();
 602 
 603         /*
 604          * If we get here, something failed in poll_for_action().
 605          */
 606         return (1);
 607 }
 608 
 609 static int
 610 nfssvcpool(int maxservers)
 611 {
 612         struct svcpool_args npa;
 613 
 614         npa.id = NFS_SVCPOOL_ID;
 615         npa.maxthreads = maxservers;
 616         npa.redline = 0;
 617         npa.qsize = 0;
 618         npa.timeout = 0;
 619         npa.stksize = 0;
 620         npa.max_same_xprt = 0;
 621         return (_nfssys(SVCPOOL_CREATE, &npa));
 622 }
 623 
 624 /*
 625  * Establish NFS service thread.
 626  */
 627 static int
 628 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
 629 {
 630         struct nfs_svc_args nsa;
 631 
 632         nsa.fd = fd;
 633         nsa.netid = nconf->nc_netid;
 634         nsa.addrmask = addrmask;
 635         if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
 636                 nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
 637                     NFS_V3 : nfs_server_vers_max;
 638                 nsa.versmin = nfs_server_vers_min;
 639                 /*
 640                  * If no version left, silently do nothing, previous
 641                  * checks will have assured at least TCP is available.
 642                  */
 643                 if (nsa.versmin > nsa.versmax)
 644                         return (0);
 645         } else {
 646                 nsa.versmax = nfs_server_vers_max;
 647                 nsa.versmin = nfs_server_vers_min;
 648         }
 649         nsa.delegation = nfs_server_delegation;
 650         return (_nfssys(NFS_SVC, &nsa));
 651 }
 652 
 653 static void
 654 usage(void)
 655 {
 656         (void) fprintf(stderr,
 657 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
 658         (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
 659         (void) fprintf(stderr,
 660 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
 661         (void) fprintf(stderr,
 662 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
 663         (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
 664         (void) fprintf(stderr, "> zero,\n");
 665         (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
 666         (void) fprintf(stderr,
 667             "\ttransport is a transport provider name (i.e. device),\n");
 668         (void) fprintf(stderr,
 669             "\tlisten_backlog is the TCP listen backlog,\n");
 670         (void) fprintf(stderr,
 671             "\tand <nservers> must be a decimal number > zero.\n");
 672         exit(1);
 673 }
 674 
 675 /*
 676  * Issue nfssys system call to flush all logging buffers asynchronously.
 677  *
 678  * NOTICE: It is extremely important to flush NFS logging buffers when
 679  *         nfsd exits. When the system is halted or rebooted nfslogd
 680  *         may not have an opportunity to flush the buffers.
 681  */
 682 static void
 683 nfsl_flush()
 684 {
 685         struct nfsl_flush_args nfa;
 686 
 687         memset((void *)&nfa, 0, sizeof (nfa));
 688         nfa.version = NFSL_FLUSH_ARGS_VERS;
 689         nfa.directive = NFSL_ALL;       /* flush all asynchronously */
 690 
 691         if (_nfssys(LOG_FLUSH, &nfa) < 0)
 692                 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
 693                     strerror(errno));
 694 }
 695 
 696 /*
 697  * SIGTERM handler.
 698  * Flush logging buffers and exit.
 699  */
 700 static void
 701 sigflush(int sig)
 702 {
 703         nfsl_flush();
 704         _exit(0);
 705 }
 706 
 707 /*
 708  * SIGUSR1 handler.
 709  *
 710  * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
 711  *
 712  * This is a Contracted Project Private interface, for the sole use
 713  * of Sun Cluster HA-NFS. See PSARC/2004/497.
 714  *
 715  * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
 716  */
 717 static void
 718 quiesce(int sig)
 719 {
 720         int error;
 721         int id = NFS_SVCPOOL_ID;
 722 
 723         if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
 724                 /* Request server quiesce at next shutdown */
 725                 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
 726 
 727                 /*
 728                  * ENOENT is returned if there is no matching SVC pool
 729                  * for the id. Possibly because the pool is not yet setup.
 730                  * In this case, just exit as if no error. For all other errors,
 731                  * just return and allow caller to retry.
 732                  */
 733                 if (error && errno != ENOENT) {
 734                         syslog(LOG_ERR,
 735                             "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
 736                             strerror(errno));
 737                         return;
 738                 }
 739         }
 740 
 741         /* Flush logging buffers */
 742         nfsl_flush();
 743 
 744         _exit(0);
 745 }
 746 
 747 /*
 748  * DSS: distributed stable storage.
 749  * Create leaf directories as required, keeping an eye on path
 750  * lengths. Calls exit(1) on failure.
 751  * The pathnames passed in must already exist, and must be writeable by nfsd.
 752  * Note: the leaf directories under NFS4_VAR_DIR are not created here;
 753  * they're created at pkg install.
 754  */
 755 static void
 756 dss_mkleafdirs(uint_t npaths, char **pathnames)
 757 {
 758         int i;
 759         char *tmppath = NULL;
 760 
 761         /*
 762          * Create the temporary storage used by dss_mkleafdir() here,
 763          * rather than in that function, so that it only needs to be
 764          * done once, rather than once for each call. Too big to put
 765          * on the function's stack.
 766          */
 767         tmppath = (char *)malloc(MAXPATHLEN);
 768         if (tmppath == NULL) {
 769                 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
 770                 exit(1);
 771         }
 772 
 773         for (i = 0; i < npaths; i++) {
 774                 char *p = pathnames[i];
 775 
 776                 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
 777                 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
 778         }
 779 
 780         free(tmppath);
 781 }
 782 
 783 /*
 784  * Create "leaf" in "dir" (which must already exist).
 785  * leaf: should start with a '/'
 786  */
 787 static void
 788 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
 789 {
 790         /* MAXPATHLEN includes the terminating NUL */
 791         if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
 792                 fprintf(stderr, "stable storage path too long: %s%s. Exiting",
 793                     dir, leaf);
 794                 exit(1);
 795         }
 796 
 797         (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
 798 
 799         /* the directory may already exist: that's OK */
 800         if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
 801                 fprintf(stderr, "error creating stable storage directory: "
 802                     "%s: %s. Exiting", strerror(errno), tmppath);
 803                 exit(1);
 804         }
 805 }
 806 
 807 /*
 808  * Create the storage dirs, and pass the path list to the kernel.
 809  * This requires the nfssrv module to be loaded; the _nfssys() syscall
 810  * will fail ENOTSUP if it is not.
 811  * Use libnvpair(3LIB) to pass the data to the kernel.
 812  */
 813 static int
 814 dss_init(uint_t npaths, char **pathnames)
 815 {
 816         int i, j, nskipped, error;
 817         char *bufp;
 818         uint32_t bufsize;
 819         size_t buflen;
 820         nvlist_t *nvl;
 821 
 822         if (npaths > 1) {
 823                 /*
 824                  * We need to remove duplicate paths; this might be user error
 825                  * in the general case, but HA-NFSv4 can also cause this.
 826                  * Sort the pathnames array, and NULL out duplicates,
 827                  * then write the non-NULL entries to a new array.
 828                  * Sorting will also allow the kernel to optimise its searches.
 829                  */
 830 
 831                 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
 832 
 833                 /* now NULL out any duplicates */
 834                 i = 0; j = 1; nskipped = 0;
 835                 while (j < npaths) {
 836                         if (strcmp(pathnames[i], pathnames[j]) == NULL) {
 837                                 pathnames[j] = NULL;
 838                                 j++;
 839                                 nskipped++;
 840                                 continue;
 841                         }
 842 
 843                         /* skip i over any of its NULLed duplicates */
 844                         i = j++;
 845                 }
 846 
 847                 /* finally, write the non-NULL entries to a new array */
 848                 if (nskipped > 0) {
 849                         int nreal;
 850                         size_t sz;
 851                         char **tmp_pathnames;
 852 
 853                         nreal = npaths - nskipped;
 854 
 855                         sz = nreal * sizeof (char *);
 856                         tmp_pathnames = (char **)malloc(sz);
 857                         if (tmp_pathnames == NULL) {
 858                                 fprintf(stderr, "tmp_pathnames malloc failed");
 859                                 exit(1);
 860                         }
 861 
 862                         for (i = 0, j = 0; i < npaths; i++)
 863                                 if (pathnames[i] != NULL)
 864                                         tmp_pathnames[j++] = pathnames[i];
 865                         free(pathnames);
 866                         pathnames = tmp_pathnames;
 867                         npaths = nreal;
 868                 }
 869 
 870         }
 871 
 872         /* Create directories to store the distributed state files */
 873         dss_mkleafdirs(npaths, pathnames);
 874 
 875         /* Create the name-value pair list */
 876         error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
 877         if (error) {
 878                 fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno));
 879                 return (1);
 880         }
 881 
 882         /* Add the pathnames array as a single name-value pair */
 883         error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
 884             pathnames, npaths);
 885         if (error) {
 886                 fprintf(stderr, "nvlist_add_string_array failed: %s.",
 887                     strerror(errno));
 888                 nvlist_free(nvl);
 889                 return (1);
 890         }
 891 
 892         /*
 893          * Pack list into contiguous memory, for passing to kernel.
 894          * nvlist_pack() will allocate the memory for the buffer,
 895          * which we should free() when no longer needed.
 896          * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
 897          */
 898         bufp = NULL;
 899         error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
 900         if (error) {
 901                 fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno));
 902                 nvlist_free(nvl);
 903                 return (1);
 904         }
 905 
 906         /* Now we have the packed buffer, we no longer need the list */
 907         nvlist_free(nvl);
 908 
 909         /*
 910          * Let the kernel know in advance how big the buffer is.
 911          * NOTE: we cannot just pass buflen, since size_t is a long, and
 912          * thus a different size between ILP32 userland and LP64 kernel.
 913          * Use an int for the transfer, since that should be big enough;
 914          * this is a no-op at the moment, here, since nfsd is 32-bit, but
 915          * that could change.
 916          */
 917         bufsize = (uint32_t)buflen;
 918         error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
 919         if (error) {
 920                 fprintf(stderr,
 921                     "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
 922                     strerror(errno));
 923                 free(bufp);
 924                 return (1);
 925         }
 926 
 927         /* Pass the packed buffer to the kernel */
 928         error = _nfssys(NFS4_DSS_SETPATHS, bufp);
 929         if (error) {
 930                 fprintf(stderr,
 931                     "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
 932                 free(bufp);
 933                 return (1);
 934         }
 935 
 936         /*
 937          * The kernel has now unpacked the buffer and extracted the
 938          * pathnames array, we no longer need the buffer.
 939          */
 940         free(bufp);
 941 
 942         return (0);
 943 }
 944 
 945 /*
 946  * Quick sort string compare routine, for qsort.
 947  * Needed to make arg types correct.
 948  */
 949 int
 950 qstrcmp(const void *p1, const void *p2)
 951 {
 952         char *s1 = *((char **)p1);
 953         char *s2 = *((char **)p2);
 954 
 955         return (strcmp(s1, s2));
 956 }