1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39 /* LINTLIBRARY */
40 /* PROTOLIB1 */
41
42 /* NFS server */
43
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <syslog.h>
48 #include <tiuser.h>
49 #include <rpc/rpc.h>
50 #include <errno.h>
51 #include <thread.h>
52 #include <sys/resource.h>
53 #include <sys/time.h>
54 #include <sys/file.h>
55 #include <nfs/nfs.h>
56 #include <nfs/nfs_acl.h>
57 #include <nfs/nfssys.h>
58 #include <stdio.h>
59 #include <stdio_ext.h>
60 #include <stdlib.h>
61 #include <signal.h>
62 #include <netconfig.h>
63 #include <netdir.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <stropts.h>
67 #include <sys/tihdr.h>
68 #include <sys/wait.h>
69 #include <poll.h>
70 #include <priv_utils.h>
71 #include <sys/tiuser.h>
72 #include <netinet/tcp.h>
73 #include <deflt.h>
74 #include <rpcsvc/daemon_utils.h>
75 #include <rpcsvc/nfs4_prot.h>
76 #include <libnvpair.h>
77 #include "nfs_tbind.h"
78 #include "thrpool.h"
79
80 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
81 #define QUIESCE_VERSMIN 4
82 /* DSS: distributed stable storage */
83 #define DSS_VERSMIN 4
84
85 static int nfssvc(int, struct netbuf, struct netconfig *);
86 static int nfssvcpool(int maxservers);
87 static int dss_init(uint_t npaths, char **pathnames);
88 static void dss_mkleafdirs(uint_t npaths, char **pathnames);
89 static void dss_mkleafdir(char *dir, char *leaf, char *path);
90 static void usage(void);
91 int qstrcmp(const void *s1, const void *s2);
92
93 extern int _nfssys(int, void *);
94
95 extern int daemonize_init(void);
96 extern void daemonize_fini(int fd);
97
98 /* signal handlers */
99 static void sigflush(int);
100 static void quiesce(int);
101
102 static char *MyName;
103 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
104 "/dev/udp6", NULL };
105 /* static NETSELDECL(defaultprotos)[] = { NC_UDP, NC_TCP, NULL }; */
106 /*
107 * The following are all globals used by routines in nfs_tbind.c.
108 */
109 size_t end_listen_fds; /* used by conn_close_oldest() */
110 size_t num_fds = 0; /* used by multiple routines */
111 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */
112 int num_servers; /* used by cots_listen_event() */
113 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
114 /* used by cots_listen_event() */
115 int max_conns_allowed = -1; /* used by cots_listen_event() */
116
117 /*
118 * Keep track of min/max versions of NFS protocol to be started.
119 * Start with the defaults (min == 2, max == 3). We have the
120 * capability of starting vers=4 but only if the user requests it.
121 */
122 int nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
123 int nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
124
125 /*
126 * Set the default for server delegation enablement and set per
127 * /etc/default/nfs configuration (if present).
128 */
129 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
130
131 /*
132 * Default values for TCP send and receive buffer size of NFS server
133 * connections.
134 *
135 * These values can be tuned by user via /etc/default/nfs configuration
136 * file by setting NFS_SERVER_SNDBUFSZ and NFS_SERVER_RCVBUFSZ.
137 *
138 * To force NFS connections to use system-wide default for TCP send and
139 * receive buffer, set NFS_SERVER_SNDBUFSZ and NFS_SERVER_RCVBUFSZ to 0.
140 */
141 int nfs_server_sndbufsz = 1048576;
142 int nfs_server_rcvbufsz = 1048576;
143
144 int
145 main(int ac, char *av[])
146 {
147 char *dir = "/";
148 int allflag = 0;
149 int df_allflag = 0;
150 int opt_cnt = 0;
151 int maxservers = 1; /* zero allows inifinte number of threads */
152 int maxservers_set = 0;
153 int logmaxservers = 0;
154 int pid;
155 int i, bufsz;
156 char *provider = (char *)NULL;
157 char *df_provider = (char *)NULL;
158 struct protob *protobp0, *protobp;
159 NETSELDECL(proto) = NULL;
160 NETSELDECL(df_proto) = NULL;
161 NETSELPDECL(providerp);
162 char *defval;
163 boolean_t can_do_mlp;
164 uint_t dss_npaths = 0;
165 char **dss_pathnames = NULL;
166 sigset_t sgset;
167
168 int pipe_fd = -1;
169
170 MyName = *av;
171
172 /*
173 * Initializations that require more privileges than we need to run.
174 */
175 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
176 svcsetprio();
177
178 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
179 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
180 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
181 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
182 (void) fprintf(stderr, "%s should be run with"
183 " sufficient privileges\n", av[0]);
184 exit(1);
185 }
186
187 (void) enable_extended_FILE_stdio(-1, -1);
188
189 /*
190 * Read in the values from config file first before we check
191 * command line options so the options override the file.
192 */
193 if ((defopen(NFSADMIN)) == 0) {
194 if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
195 errno = 0;
196 max_conns_allowed = strtol(defval, (char **)NULL, 10);
197 if (errno != 0) {
198 max_conns_allowed = -1;
199 }
200 }
201 if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
202 errno = 0;
203 listen_backlog = strtol(defval, (char **)NULL, 10);
204 if (errno != 0) {
205 listen_backlog = 32;
206 }
207 }
208 if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
209 df_proto = strdup(defval);
210 opt_cnt++;
211 if (strncasecmp("ALL", defval, 3) == 0) {
212 free(df_proto);
213 df_proto = NULL;
214 df_allflag = 1;
215 }
216 }
217 if ((defval = defread("NFSD_DEVICE=")) != NULL) {
218 df_provider = strdup(defval);
219 opt_cnt++;
220 }
221 if ((defval = defread("NFSD_SERVERS=")) != NULL) {
222 errno = 0;
223 maxservers = strtol(defval, (char **)NULL, 10);
224 if (errno != 0) {
225 maxservers = 1;
226 } else {
227 maxservers_set = 1;
228 }
229 }
230 if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
231 errno = 0;
232 nfs_server_vers_min =
233 strtol(defval, (char **)NULL, 10);
234 if (errno != 0) {
235 nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
236 }
237 }
238 if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
239 errno = 0;
240 nfs_server_vers_max =
241 strtol(defval, (char **)NULL, 10);
242 if (errno != 0) {
243 nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
244 }
245 }
246 if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
247 if (strcmp(defval, "off") == 0) {
248 nfs_server_delegation = FALSE;
249 }
250 }
251 if ((defval = defread("NFS_SERVER_SNDBUFSZ=")) != NULL) {
252 errno = 0;
253 bufsz = strtol(defval, (char **)NULL, 10);
254 if (errno == 0)
255 nfs_server_sndbufsz = bufsz;
256 }
257 if ((defval = defread("NFS_SERVER_RCVBUFSZ=")) != NULL) {
258 errno = 0;
259 bufsz = strtol(defval, (char **)NULL, 10);
260 if (errno == 0)
261 nfs_server_rcvbufsz = bufsz;
262 }
263
264 /* close defaults file */
265 defopen(NULL);
266 }
267
268 /*
269 * Conflict options error messages.
270 */
271 if (opt_cnt > 1) {
272 (void) fprintf(stderr, "\nConflicting options, only one of "
273 "the following options can be specified\n"
274 "in " NFSADMIN ":\n"
275 "\tNFSD_PROTOCOL=ALL\n"
276 "\tNFSD_PROTOCOL=protocol\n"
277 "\tNFSD_DEVICE=device\n\n");
278 usage();
279 }
280 opt_cnt = 0;
281
282 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
283 switch (i) {
284 case 'a':
285 free(df_proto);
286 df_proto = NULL;
287 free(df_provider);
288 df_provider = NULL;
289
290 allflag = 1;
291 opt_cnt++;
292 break;
293
294 case 'c':
295 max_conns_allowed = atoi(optarg);
296 break;
297
298 case 'p':
299 proto = optarg;
300 df_allflag = 0;
301 opt_cnt++;
302 break;
303
304 /*
305 * DSS: NFSv4 distributed stable storage.
306 *
307 * This is a Contracted Project Private interface, for
308 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
309 */
310 case 's':
311 if (strlen(optarg) < MAXPATHLEN) {
312 /* first "-s" option encountered? */
313 if (dss_pathnames == NULL) {
314 /*
315 * Allocate maximum possible space
316 * required given cmdline arg count;
317 * "-s <path>" consumes two args.
318 */
319 size_t sz = (ac / 2) * sizeof (char *);
320 dss_pathnames = (char **)malloc(sz);
321 if (dss_pathnames == NULL) {
322 (void) fprintf(stderr, "%s: "
323 "dss paths malloc failed\n",
324 av[0]);
325 exit(1);
326 }
327 (void) memset(dss_pathnames, 0, sz);
328 }
329 dss_pathnames[dss_npaths] = optarg;
330 dss_npaths++;
331 } else {
332 (void) fprintf(stderr,
333 "%s: -s pathname too long.\n", av[0]);
334 }
335 break;
336
337 case 't':
338 provider = optarg;
339 df_allflag = 0;
340 opt_cnt++;
341 break;
342
343 case 'l':
344 listen_backlog = atoi(optarg);
345 break;
346
347 case '?':
348 usage();
349 /* NOTREACHED */
350 }
351 }
352
353 allflag = df_allflag;
354 if (proto == NULL)
355 proto = df_proto;
356 if (provider == NULL)
357 provider = df_provider;
358
359 /*
360 * Conflict options error messages.
361 */
362 if (opt_cnt > 1) {
363 (void) fprintf(stderr, "\nConflicting options, only one of "
364 "the following options can be specified\n"
365 "on the command line:\n"
366 "\t-a\n"
367 "\t-p protocol\n"
368 "\t-t transport\n\n");
369 usage();
370 }
371
372 if (proto != NULL &&
373 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
374 if (nfs_server_vers_max == NFS_V4) {
375 if (nfs_server_vers_min == NFS_V4) {
376 fprintf(stderr,
377 "NFS version 4 is not supported "
378 "with the UDP protocol. Exiting\n");
379 exit(3);
380 } else {
381 fprintf(stderr,
382 "NFS version 4 is not supported "
383 "with the UDP protocol.\n");
384 }
385 }
386 }
387
388 /*
389 * If there is exactly one more argument, it is the number of
390 * servers.
391 */
392 if (optind == ac - 1) {
393 maxservers = atoi(av[optind]);
394 maxservers_set = 1;
395 }
396 /*
397 * If there are two or more arguments, then this is a usage error.
398 */
399 else if (optind < ac - 1)
400 usage();
401 /*
402 * Check the ranges for min/max version specified
403 */
404 else if ((nfs_server_vers_min > nfs_server_vers_max) ||
405 (nfs_server_vers_min < NFS_VERSMIN) ||
406 (nfs_server_vers_max > NFS_VERSMAX))
407 usage();
408 /*
409 * There are no additional arguments, and we haven't set maxservers
410 * explicitly via the config file, we use a default number of
411 * servers. We will log this.
412 */
413 else if (maxservers_set == 0)
414 logmaxservers = 1;
415
416 /*
417 * Basic Sanity checks on options
418 *
419 * max_conns_allowed must be positive, except for the special
420 * value of -1 which is used internally to mean unlimited, -1 isn't
421 * documented but we allow it anyway.
422 *
423 * maxservers must be positive
424 * listen_backlog must be positive or zero
425 */
426 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
427 (listen_backlog < 0) || (maxservers <= 0)) {
428 usage();
429 }
430
431 /*
432 * Set current dir to server root
433 */
434 if (chdir(dir) < 0) {
435 (void) fprintf(stderr, "%s: ", MyName);
436 perror(dir);
437 exit(1);
438 }
439
440 #ifndef DEBUG
441 pipe_fd = daemonize_init();
442 #endif
443
444 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
445
446 /*
447 * establish our lock on the lock file and write our pid to it.
448 * exit if some other process holds the lock, or if there's any
449 * error in writing/locking the file.
450 */
451 pid = _enter_daemon_lock(NFSD);
452 switch (pid) {
453 case 0:
454 break;
455 case -1:
456 fprintf(stderr, "error locking for %s: %s", NFSD,
457 strerror(errno));
458 exit(2);
459 default:
460 /* daemon was already running */
461 exit(0);
462 }
463
464 /*
465 * If we've been given a list of paths to be used for distributed
466 * stable storage, and provided we're going to run a version
467 * that supports it, setup the DSS paths.
468 */
469 if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
470 if (dss_init(dss_npaths, dss_pathnames) != 0) {
471 fprintf(stderr, "%s", "dss_init failed. Exiting.");
472 exit(1);
473 }
474 }
475
476 /*
477 * Block all signals till we spawn other
478 * threads.
479 */
480 (void) sigfillset(&sgset);
481 (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
482
483 if (logmaxservers) {
484 fprintf(stderr,
485 "Number of servers not specified. Using default of %d.",
486 maxservers);
487 }
488
489 /*
490 * Make sure to unregister any previous versions in case the
491 * user is reconfiguring the server in interesting ways.
492 */
493 svc_unreg(NFS_PROGRAM, NFS_VERSION);
494 svc_unreg(NFS_PROGRAM, NFS_V3);
495 svc_unreg(NFS_PROGRAM, NFS_V4);
496 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
497 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
498
499 /*
500 * Set up kernel RPC thread pool for the NFS server.
501 */
502 if (nfssvcpool(maxservers)) {
503 fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting",
504 strerror(errno));
505 exit(1);
506 }
507
508 /*
509 * Set up blocked thread to do LWP creation on behalf of the kernel.
510 */
511 if (svcwait(NFS_SVCPOOL_ID)) {
512 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting",
513 strerror(errno));
514 exit(1);
515 }
516
517 /*
518 * RDMA start and stop thread.
519 * Per pool RDMA listener creation and
520 * destructor thread.
521 *
522 * start rdma services and block in the kernel.
523 * (only if proto or provider is not set to TCP or UDP)
524 */
525 if ((proto == NULL) && (provider == NULL)) {
526 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
527 nfs_server_vers_max, nfs_server_delegation)) {
528 fprintf(stderr,
529 "Can't set up RDMA creator thread : %s",
530 strerror(errno));
531 }
532 }
533
534 /*
535 * Now open up for signal delivery
536 */
537
538 (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
539 sigset(SIGTERM, sigflush);
540 sigset(SIGUSR1, quiesce);
541
542 /*
543 * Build a protocol block list for registration.
544 */
545 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
546 protobp->serv = "NFS";
547 protobp->versmin = nfs_server_vers_min;
548 protobp->versmax = nfs_server_vers_max;
549 protobp->program = NFS_PROGRAM;
550
551 protobp->next = (struct protob *)malloc(sizeof (struct protob));
552 protobp = protobp->next;
553 protobp->serv = "NFS_ACL"; /* not used */
554 protobp->versmin = nfs_server_vers_min;
555 /* XXX - this needs work to get the version just right */
556 protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
557 NFS_ACL_V3 : nfs_server_vers_max;
558 protobp->program = NFS_ACL_PROGRAM;
559 protobp->next = (struct protob *)NULL;
560
561 if (allflag) {
562 if (do_all_setbuf(protobp0, nfssvc, 0, nfs_server_sndbufsz,
563 nfs_server_rcvbufsz) == -1) {
564 fprintf(stderr, "setnetconfig failed : %s",
565 strerror(errno));
566 exit(1);
567 }
568 } else if (proto) {
569 /* there's more than one match for the same protocol */
570 struct netconfig *nconf;
571 NCONF_HANDLE *nc;
572 bool_t protoFound = FALSE;
573 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
574 fprintf(stderr, "setnetconfig failed : %s",
575 strerror(errno));
576 goto done;
577 }
578 while (nconf = getnetconfig(nc)) {
579 if (strcmp(nconf->nc_proto, proto) == 0) {
580 protoFound = TRUE;
581 do_one_setbuf(nconf->nc_device, NULL,
582 protobp0, nfssvc, 0,
583 nfs_server_sndbufsz, nfs_server_rcvbufsz);
584 }
585 }
586 (void) endnetconfig(nc);
587 if (protoFound == FALSE) {
588 fprintf(stderr,
589 "couldn't find netconfig entry for protocol %s",
590 proto);
591 }
592 } else if (provider)
593 do_one_setbuf(provider, proto, protobp0, nfssvc, 0,
594 nfs_server_sndbufsz, nfs_server_rcvbufsz);
595 else {
596 for (providerp = defaultproviders;
597 *providerp != NULL; providerp++) {
598 provider = *providerp;
599 do_one_setbuf(provider, NULL, protobp0, nfssvc, 0,
600 nfs_server_sndbufsz, nfs_server_rcvbufsz);
601 }
602 }
603 done:
604
605 free(protobp);
606 free(protobp0);
607
608 if (num_fds == 0) {
609 fprintf(stderr, "Could not start NFS service for any protocol."
610 " Exiting");
611 exit(1);
612 }
613
614 end_listen_fds = num_fds;
615
616 /*
617 * nfsd is up and running as far as we are concerned.
618 */
619 daemonize_fini(pipe_fd);
620
621 /*
622 * Get rid of unneeded privileges.
623 */
624 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
625 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
626
627 /*
628 * Poll for non-data control events on the transport descriptors.
629 */
630 poll_for_action();
631
632 /*
633 * If we get here, something failed in poll_for_action().
634 */
635 return (1);
636 }
637
638 static int
639 nfssvcpool(int maxservers)
640 {
641 struct svcpool_args npa;
642
643 npa.id = NFS_SVCPOOL_ID;
644 npa.maxthreads = maxservers;
645 npa.redline = 0;
646 npa.qsize = 0;
647 npa.timeout = 0;
648 npa.stksize = 0;
649 npa.max_same_xprt = 0;
650 return (_nfssys(SVCPOOL_CREATE, &npa));
651 }
652
653 /*
654 * Establish NFS service thread.
655 */
656 static int
657 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
658 {
659 struct nfs_svc_args nsa;
660
661 nsa.fd = fd;
662 nsa.netid = nconf->nc_netid;
663 nsa.addrmask = addrmask;
664 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
665 nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
666 NFS_V3 : nfs_server_vers_max;
667 nsa.versmin = nfs_server_vers_min;
668 /*
669 * If no version left, silently do nothing, previous
670 * checks will have assured at least TCP is available.
671 */
672 if (nsa.versmin > nsa.versmax)
673 return (0);
674 } else {
675 nsa.versmax = nfs_server_vers_max;
676 nsa.versmin = nfs_server_vers_min;
677 }
678 nsa.delegation = nfs_server_delegation;
679 return (_nfssys(NFS_SVC, &nsa));
680 }
681
682 static void
683 usage(void)
684 {
685 (void) fprintf(stderr,
686 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
687 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
688 (void) fprintf(stderr,
689 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
690 (void) fprintf(stderr,
691 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
692 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
693 (void) fprintf(stderr, "> zero,\n");
694 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
695 (void) fprintf(stderr,
696 "\ttransport is a transport provider name (i.e. device),\n");
697 (void) fprintf(stderr,
698 "\tlisten_backlog is the TCP listen backlog,\n");
699 (void) fprintf(stderr,
700 "\tand <nservers> must be a decimal number > zero.\n");
701 exit(1);
702 }
703
704 /*
705 * Issue nfssys system call to flush all logging buffers asynchronously.
706 *
707 * NOTICE: It is extremely important to flush NFS logging buffers when
708 * nfsd exits. When the system is halted or rebooted nfslogd
709 * may not have an opportunity to flush the buffers.
710 */
711 static void
712 nfsl_flush()
713 {
714 struct nfsl_flush_args nfa;
715
716 memset((void *)&nfa, 0, sizeof (nfa));
717 nfa.version = NFSL_FLUSH_ARGS_VERS;
718 nfa.directive = NFSL_ALL; /* flush all asynchronously */
719
720 if (_nfssys(LOG_FLUSH, &nfa) < 0)
721 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
722 strerror(errno));
723 }
724
725 /*
726 * SIGTERM handler.
727 * Flush logging buffers and exit.
728 */
729 static void
730 sigflush(int sig)
731 {
732 nfsl_flush();
733 _exit(0);
734 }
735
736 /*
737 * SIGUSR1 handler.
738 *
739 * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
740 *
741 * This is a Contracted Project Private interface, for the sole use
742 * of Sun Cluster HA-NFS. See PSARC/2004/497.
743 *
744 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
745 */
746 static void
747 quiesce(int sig)
748 {
749 int error;
750 int id = NFS_SVCPOOL_ID;
751
752 if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
753 /* Request server quiesce at next shutdown */
754 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
755
756 /*
757 * ENOENT is returned if there is no matching SVC pool
758 * for the id. Possibly because the pool is not yet setup.
759 * In this case, just exit as if no error. For all other errors,
760 * just return and allow caller to retry.
761 */
762 if (error && errno != ENOENT) {
763 syslog(LOG_ERR,
764 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
765 strerror(errno));
766 return;
767 }
768 }
769
770 /* Flush logging buffers */
771 nfsl_flush();
772
773 _exit(0);
774 }
775
776 /*
777 * DSS: distributed stable storage.
778 * Create leaf directories as required, keeping an eye on path
779 * lengths. Calls exit(1) on failure.
780 * The pathnames passed in must already exist, and must be writeable by nfsd.
781 * Note: the leaf directories under NFS4_VAR_DIR are not created here;
782 * they're created at pkg install.
783 */
784 static void
785 dss_mkleafdirs(uint_t npaths, char **pathnames)
786 {
787 int i;
788 char *tmppath = NULL;
789
790 /*
791 * Create the temporary storage used by dss_mkleafdir() here,
792 * rather than in that function, so that it only needs to be
793 * done once, rather than once for each call. Too big to put
794 * on the function's stack.
795 */
796 tmppath = (char *)malloc(MAXPATHLEN);
797 if (tmppath == NULL) {
798 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
799 exit(1);
800 }
801
802 for (i = 0; i < npaths; i++) {
803 char *p = pathnames[i];
804
805 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
806 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
807 }
808
809 free(tmppath);
810 }
811
812 /*
813 * Create "leaf" in "dir" (which must already exist).
814 * leaf: should start with a '/'
815 */
816 static void
817 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
818 {
819 /* MAXPATHLEN includes the terminating NUL */
820 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
821 fprintf(stderr, "stable storage path too long: %s%s. Exiting",
822 dir, leaf);
823 exit(1);
824 }
825
826 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
827
828 /* the directory may already exist: that's OK */
829 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
830 fprintf(stderr, "error creating stable storage directory: "
831 "%s: %s. Exiting", strerror(errno), tmppath);
832 exit(1);
833 }
834 }
835
836 /*
837 * Create the storage dirs, and pass the path list to the kernel.
838 * This requires the nfssrv module to be loaded; the _nfssys() syscall
839 * will fail ENOTSUP if it is not.
840 * Use libnvpair(3LIB) to pass the data to the kernel.
841 */
842 static int
843 dss_init(uint_t npaths, char **pathnames)
844 {
845 int i, j, nskipped, error;
846 char *bufp;
847 uint32_t bufsize;
848 size_t buflen;
849 nvlist_t *nvl;
850
851 if (npaths > 1) {
852 /*
853 * We need to remove duplicate paths; this might be user error
854 * in the general case, but HA-NFSv4 can also cause this.
855 * Sort the pathnames array, and NULL out duplicates,
856 * then write the non-NULL entries to a new array.
857 * Sorting will also allow the kernel to optimise its searches.
858 */
859
860 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
861
862 /* now NULL out any duplicates */
863 i = 0; j = 1; nskipped = 0;
864 while (j < npaths) {
865 if (strcmp(pathnames[i], pathnames[j]) == NULL) {
866 pathnames[j] = NULL;
867 j++;
868 nskipped++;
869 continue;
870 }
871
872 /* skip i over any of its NULLed duplicates */
873 i = j++;
874 }
875
876 /* finally, write the non-NULL entries to a new array */
877 if (nskipped > 0) {
878 int nreal;
879 size_t sz;
880 char **tmp_pathnames;
881
882 nreal = npaths - nskipped;
883
884 sz = nreal * sizeof (char *);
885 tmp_pathnames = (char **)malloc(sz);
886 if (tmp_pathnames == NULL) {
887 fprintf(stderr, "tmp_pathnames malloc failed");
888 exit(1);
889 }
890
891 for (i = 0, j = 0; i < npaths; i++)
892 if (pathnames[i] != NULL)
893 tmp_pathnames[j++] = pathnames[i];
894 free(pathnames);
895 pathnames = tmp_pathnames;
896 npaths = nreal;
897 }
898
899 }
900
901 /* Create directories to store the distributed state files */
902 dss_mkleafdirs(npaths, pathnames);
903
904 /* Create the name-value pair list */
905 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
906 if (error) {
907 fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno));
908 return (1);
909 }
910
911 /* Add the pathnames array as a single name-value pair */
912 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
913 pathnames, npaths);
914 if (error) {
915 fprintf(stderr, "nvlist_add_string_array failed: %s.",
916 strerror(errno));
917 nvlist_free(nvl);
918 return (1);
919 }
920
921 /*
922 * Pack list into contiguous memory, for passing to kernel.
923 * nvlist_pack() will allocate the memory for the buffer,
924 * which we should free() when no longer needed.
925 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
926 */
927 bufp = NULL;
928 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
929 if (error) {
930 fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno));
931 nvlist_free(nvl);
932 return (1);
933 }
934
935 /* Now we have the packed buffer, we no longer need the list */
936 nvlist_free(nvl);
937
938 /*
939 * Let the kernel know in advance how big the buffer is.
940 * NOTE: we cannot just pass buflen, since size_t is a long, and
941 * thus a different size between ILP32 userland and LP64 kernel.
942 * Use an int for the transfer, since that should be big enough;
943 * this is a no-op at the moment, here, since nfsd is 32-bit, but
944 * that could change.
945 */
946 bufsize = (uint32_t)buflen;
947 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
948 if (error) {
949 fprintf(stderr,
950 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
951 strerror(errno));
952 free(bufp);
953 return (1);
954 }
955
956 /* Pass the packed buffer to the kernel */
957 error = _nfssys(NFS4_DSS_SETPATHS, bufp);
958 if (error) {
959 fprintf(stderr,
960 "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
961 free(bufp);
962 return (1);
963 }
964
965 /*
966 * The kernel has now unpacked the buffer and extracted the
967 * pathnames array, we no longer need the buffer.
968 */
969 free(bufp);
970
971 return (0);
972 }
973
974 /*
975 * Quick sort string compare routine, for qsort.
976 * Needed to make arg types correct.
977 */
978 int
979 qstrcmp(const void *p1, const void *p2)
980 {
981 char *s1 = *((char **)p1);
982 char *s2 = *((char **)p2);
983
984 return (strcmp(s1, s2));
985 }