Print this page
4953763 Need way to configure NFS window sizes without changing system wide defaults
6216670 NFS server needs a bigger transmit buffer
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
+++ new/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29 /*
30 30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 31 * The Regents of the University of California
32 32 * All Rights Reserved
33 33 *
34 34 * University Acknowledgment- Portions of this document are derived from
35 35 * software developed by the University of California, Berkeley, and its
36 36 * contributors.
37 37 */
38 38
39 39 /* LINTLIBRARY */
40 40 /* PROTOLIB1 */
41 41
42 42 /* NFS server */
43 43
44 44 #include <sys/param.h>
45 45 #include <sys/types.h>
46 46 #include <sys/stat.h>
47 47 #include <syslog.h>
48 48 #include <tiuser.h>
49 49 #include <rpc/rpc.h>
50 50 #include <errno.h>
51 51 #include <thread.h>
52 52 #include <sys/resource.h>
53 53 #include <sys/time.h>
54 54 #include <sys/file.h>
55 55 #include <nfs/nfs.h>
56 56 #include <nfs/nfs_acl.h>
57 57 #include <nfs/nfssys.h>
58 58 #include <stdio.h>
59 59 #include <stdio_ext.h>
60 60 #include <stdlib.h>
61 61 #include <signal.h>
62 62 #include <netconfig.h>
63 63 #include <netdir.h>
64 64 #include <string.h>
65 65 #include <unistd.h>
66 66 #include <stropts.h>
67 67 #include <sys/tihdr.h>
68 68 #include <sys/wait.h>
69 69 #include <poll.h>
70 70 #include <priv_utils.h>
71 71 #include <sys/tiuser.h>
72 72 #include <netinet/tcp.h>
73 73 #include <deflt.h>
74 74 #include <rpcsvc/daemon_utils.h>
75 75 #include <rpcsvc/nfs4_prot.h>
76 76 #include <libnvpair.h>
77 77 #include "nfs_tbind.h"
78 78 #include "thrpool.h"
79 79
80 80 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
81 81 #define QUIESCE_VERSMIN 4
82 82 /* DSS: distributed stable storage */
83 83 #define DSS_VERSMIN 4
84 84
85 85 static int nfssvc(int, struct netbuf, struct netconfig *);
86 86 static int nfssvcpool(int maxservers);
87 87 static int dss_init(uint_t npaths, char **pathnames);
88 88 static void dss_mkleafdirs(uint_t npaths, char **pathnames);
89 89 static void dss_mkleafdir(char *dir, char *leaf, char *path);
90 90 static void usage(void);
91 91 int qstrcmp(const void *s1, const void *s2);
92 92
93 93 extern int _nfssys(int, void *);
94 94
95 95 extern int daemonize_init(void);
96 96 extern void daemonize_fini(int fd);
97 97
98 98 /* signal handlers */
99 99 static void sigflush(int);
100 100 static void quiesce(int);
101 101
102 102 static char *MyName;
103 103 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
104 104 "/dev/udp6", NULL };
105 105 /* static NETSELDECL(defaultprotos)[] = { NC_UDP, NC_TCP, NULL }; */
106 106 /*
107 107 * The following are all globals used by routines in nfs_tbind.c.
108 108 */
109 109 size_t end_listen_fds; /* used by conn_close_oldest() */
110 110 size_t num_fds = 0; /* used by multiple routines */
111 111 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */
112 112 int num_servers; /* used by cots_listen_event() */
113 113 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
114 114 /* used by cots_listen_event() */
115 115 int max_conns_allowed = -1; /* used by cots_listen_event() */
116 116
117 117 /*
118 118 * Keep track of min/max versions of NFS protocol to be started.
119 119 * Start with the defaults (min == 2, max == 3). We have the
120 120 * capability of starting vers=4 but only if the user requests it.
|
↓ open down ↓ |
120 lines elided |
↑ open up ↑ |
121 121 */
122 122 int nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
123 123 int nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
124 124
125 125 /*
126 126 * Set the default for server delegation enablement and set per
127 127 * /etc/default/nfs configuration (if present).
128 128 */
129 129 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
130 130
131 +/*
132 + * Default values for TCP send and receive buffer size of NFS server
133 + * connections.
134 + *
135 + * These values can be tuned by user via /etc/default/nfs configuration
136 + * file by setting NFS_SERVER_SNDBUFSZ and NFS_SERVER_RCVBUFSZ.
137 + *
138 + * To force NFS connections to use system-wide default for TCP send and
139 + * receive buffer, set NFS_SERVER_SNDBUFSZ and NFS_SERVER_RCVBUFSZ to 0.
140 + */
141 +int nfs_server_sndbufsz = 1048576;
142 +int nfs_server_rcvbufsz = 1048576;
143 +
131 144 int
132 145 main(int ac, char *av[])
133 146 {
134 147 char *dir = "/";
135 148 int allflag = 0;
136 149 int df_allflag = 0;
137 150 int opt_cnt = 0;
138 151 int maxservers = 1; /* zero allows inifinte number of threads */
139 152 int maxservers_set = 0;
140 153 int logmaxservers = 0;
141 154 int pid;
142 - int i;
155 + int i, bufsz;
143 156 char *provider = (char *)NULL;
144 157 char *df_provider = (char *)NULL;
145 158 struct protob *protobp0, *protobp;
146 159 NETSELDECL(proto) = NULL;
147 160 NETSELDECL(df_proto) = NULL;
148 161 NETSELPDECL(providerp);
149 162 char *defval;
150 163 boolean_t can_do_mlp;
151 164 uint_t dss_npaths = 0;
152 165 char **dss_pathnames = NULL;
153 166 sigset_t sgset;
154 167
155 168 int pipe_fd = -1;
156 169
157 170 MyName = *av;
158 171
159 172 /*
160 173 * Initializations that require more privileges than we need to run.
161 174 */
162 175 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
163 176 svcsetprio();
164 177
165 178 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
166 179 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
167 180 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
168 181 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
169 182 (void) fprintf(stderr, "%s should be run with"
170 183 " sufficient privileges\n", av[0]);
171 184 exit(1);
172 185 }
173 186
174 187 (void) enable_extended_FILE_stdio(-1, -1);
175 188
176 189 /*
177 190 * Read in the values from config file first before we check
178 191 * command line options so the options override the file.
179 192 */
180 193 if ((defopen(NFSADMIN)) == 0) {
181 194 if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
182 195 errno = 0;
183 196 max_conns_allowed = strtol(defval, (char **)NULL, 10);
184 197 if (errno != 0) {
185 198 max_conns_allowed = -1;
186 199 }
187 200 }
188 201 if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
189 202 errno = 0;
190 203 listen_backlog = strtol(defval, (char **)NULL, 10);
191 204 if (errno != 0) {
192 205 listen_backlog = 32;
193 206 }
194 207 }
195 208 if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
196 209 df_proto = strdup(defval);
197 210 opt_cnt++;
198 211 if (strncasecmp("ALL", defval, 3) == 0) {
199 212 free(df_proto);
200 213 df_proto = NULL;
201 214 df_allflag = 1;
202 215 }
203 216 }
204 217 if ((defval = defread("NFSD_DEVICE=")) != NULL) {
205 218 df_provider = strdup(defval);
206 219 opt_cnt++;
207 220 }
208 221 if ((defval = defread("NFSD_SERVERS=")) != NULL) {
209 222 errno = 0;
210 223 maxservers = strtol(defval, (char **)NULL, 10);
211 224 if (errno != 0) {
212 225 maxservers = 1;
213 226 } else {
214 227 maxservers_set = 1;
215 228 }
216 229 }
217 230 if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
218 231 errno = 0;
219 232 nfs_server_vers_min =
220 233 strtol(defval, (char **)NULL, 10);
221 234 if (errno != 0) {
222 235 nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
223 236 }
224 237 }
225 238 if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
226 239 errno = 0;
227 240 nfs_server_vers_max =
|
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
228 241 strtol(defval, (char **)NULL, 10);
229 242 if (errno != 0) {
230 243 nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
231 244 }
232 245 }
233 246 if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
234 247 if (strcmp(defval, "off") == 0) {
235 248 nfs_server_delegation = FALSE;
236 249 }
237 250 }
251 + if ((defval = defread("NFS_SERVER_SNDBUFSZ=")) != NULL) {
252 + errno = 0;
253 + bufsz = strtol(defval, (char **)NULL, 10);
254 + if (errno == 0)
255 + nfs_server_sndbufsz = bufsz;
256 + }
257 + if ((defval = defread("NFS_SERVER_RCVBUFSZ=")) != NULL) {
258 + errno = 0;
259 + bufsz = strtol(defval, (char **)NULL, 10);
260 + if (errno == 0)
261 + nfs_server_rcvbufsz = bufsz;
262 + }
238 263
239 264 /* close defaults file */
240 265 defopen(NULL);
241 266 }
242 267
243 268 /*
244 269 * Conflict options error messages.
245 270 */
246 271 if (opt_cnt > 1) {
247 272 (void) fprintf(stderr, "\nConflicting options, only one of "
248 273 "the following options can be specified\n"
249 274 "in " NFSADMIN ":\n"
250 275 "\tNFSD_PROTOCOL=ALL\n"
251 276 "\tNFSD_PROTOCOL=protocol\n"
252 277 "\tNFSD_DEVICE=device\n\n");
253 278 usage();
254 279 }
255 280 opt_cnt = 0;
256 281
257 282 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
258 283 switch (i) {
259 284 case 'a':
260 285 free(df_proto);
261 286 df_proto = NULL;
262 287 free(df_provider);
263 288 df_provider = NULL;
264 289
265 290 allflag = 1;
266 291 opt_cnt++;
267 292 break;
268 293
269 294 case 'c':
270 295 max_conns_allowed = atoi(optarg);
271 296 break;
272 297
273 298 case 'p':
274 299 proto = optarg;
275 300 df_allflag = 0;
276 301 opt_cnt++;
277 302 break;
278 303
279 304 /*
280 305 * DSS: NFSv4 distributed stable storage.
281 306 *
282 307 * This is a Contracted Project Private interface, for
283 308 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
284 309 */
285 310 case 's':
286 311 if (strlen(optarg) < MAXPATHLEN) {
287 312 /* first "-s" option encountered? */
288 313 if (dss_pathnames == NULL) {
289 314 /*
290 315 * Allocate maximum possible space
291 316 * required given cmdline arg count;
292 317 * "-s <path>" consumes two args.
293 318 */
294 319 size_t sz = (ac / 2) * sizeof (char *);
295 320 dss_pathnames = (char **)malloc(sz);
296 321 if (dss_pathnames == NULL) {
297 322 (void) fprintf(stderr, "%s: "
298 323 "dss paths malloc failed\n",
299 324 av[0]);
300 325 exit(1);
301 326 }
302 327 (void) memset(dss_pathnames, 0, sz);
303 328 }
304 329 dss_pathnames[dss_npaths] = optarg;
305 330 dss_npaths++;
306 331 } else {
307 332 (void) fprintf(stderr,
308 333 "%s: -s pathname too long.\n", av[0]);
309 334 }
310 335 break;
311 336
312 337 case 't':
313 338 provider = optarg;
314 339 df_allflag = 0;
315 340 opt_cnt++;
316 341 break;
317 342
318 343 case 'l':
319 344 listen_backlog = atoi(optarg);
320 345 break;
321 346
322 347 case '?':
323 348 usage();
324 349 /* NOTREACHED */
325 350 }
326 351 }
327 352
328 353 allflag = df_allflag;
329 354 if (proto == NULL)
330 355 proto = df_proto;
331 356 if (provider == NULL)
332 357 provider = df_provider;
333 358
334 359 /*
335 360 * Conflict options error messages.
336 361 */
337 362 if (opt_cnt > 1) {
338 363 (void) fprintf(stderr, "\nConflicting options, only one of "
339 364 "the following options can be specified\n"
340 365 "on the command line:\n"
341 366 "\t-a\n"
342 367 "\t-p protocol\n"
343 368 "\t-t transport\n\n");
344 369 usage();
345 370 }
346 371
347 372 if (proto != NULL &&
348 373 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
349 374 if (nfs_server_vers_max == NFS_V4) {
350 375 if (nfs_server_vers_min == NFS_V4) {
351 376 fprintf(stderr,
352 377 "NFS version 4 is not supported "
353 378 "with the UDP protocol. Exiting\n");
354 379 exit(3);
355 380 } else {
356 381 fprintf(stderr,
357 382 "NFS version 4 is not supported "
358 383 "with the UDP protocol.\n");
359 384 }
360 385 }
361 386 }
362 387
363 388 /*
364 389 * If there is exactly one more argument, it is the number of
365 390 * servers.
366 391 */
367 392 if (optind == ac - 1) {
368 393 maxservers = atoi(av[optind]);
369 394 maxservers_set = 1;
370 395 }
371 396 /*
372 397 * If there are two or more arguments, then this is a usage error.
373 398 */
374 399 else if (optind < ac - 1)
375 400 usage();
376 401 /*
377 402 * Check the ranges for min/max version specified
378 403 */
379 404 else if ((nfs_server_vers_min > nfs_server_vers_max) ||
380 405 (nfs_server_vers_min < NFS_VERSMIN) ||
381 406 (nfs_server_vers_max > NFS_VERSMAX))
382 407 usage();
383 408 /*
384 409 * There are no additional arguments, and we haven't set maxservers
385 410 * explicitly via the config file, we use a default number of
386 411 * servers. We will log this.
387 412 */
388 413 else if (maxservers_set == 0)
389 414 logmaxservers = 1;
390 415
391 416 /*
392 417 * Basic Sanity checks on options
393 418 *
394 419 * max_conns_allowed must be positive, except for the special
395 420 * value of -1 which is used internally to mean unlimited, -1 isn't
396 421 * documented but we allow it anyway.
397 422 *
398 423 * maxservers must be positive
399 424 * listen_backlog must be positive or zero
400 425 */
401 426 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
402 427 (listen_backlog < 0) || (maxservers <= 0)) {
403 428 usage();
404 429 }
405 430
406 431 /*
407 432 * Set current dir to server root
408 433 */
409 434 if (chdir(dir) < 0) {
410 435 (void) fprintf(stderr, "%s: ", MyName);
411 436 perror(dir);
412 437 exit(1);
413 438 }
414 439
415 440 #ifndef DEBUG
416 441 pipe_fd = daemonize_init();
417 442 #endif
418 443
419 444 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
420 445
421 446 /*
422 447 * establish our lock on the lock file and write our pid to it.
423 448 * exit if some other process holds the lock, or if there's any
424 449 * error in writing/locking the file.
425 450 */
426 451 pid = _enter_daemon_lock(NFSD);
427 452 switch (pid) {
428 453 case 0:
429 454 break;
430 455 case -1:
431 456 fprintf(stderr, "error locking for %s: %s", NFSD,
432 457 strerror(errno));
433 458 exit(2);
434 459 default:
435 460 /* daemon was already running */
436 461 exit(0);
437 462 }
438 463
439 464 /*
440 465 * If we've been given a list of paths to be used for distributed
441 466 * stable storage, and provided we're going to run a version
442 467 * that supports it, setup the DSS paths.
443 468 */
444 469 if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
445 470 if (dss_init(dss_npaths, dss_pathnames) != 0) {
446 471 fprintf(stderr, "%s", "dss_init failed. Exiting.");
447 472 exit(1);
448 473 }
449 474 }
450 475
451 476 /*
452 477 * Block all signals till we spawn other
453 478 * threads.
454 479 */
455 480 (void) sigfillset(&sgset);
456 481 (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
457 482
458 483 if (logmaxservers) {
459 484 fprintf(stderr,
460 485 "Number of servers not specified. Using default of %d.",
461 486 maxservers);
462 487 }
463 488
464 489 /*
465 490 * Make sure to unregister any previous versions in case the
466 491 * user is reconfiguring the server in interesting ways.
467 492 */
468 493 svc_unreg(NFS_PROGRAM, NFS_VERSION);
469 494 svc_unreg(NFS_PROGRAM, NFS_V3);
470 495 svc_unreg(NFS_PROGRAM, NFS_V4);
471 496 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
472 497 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
473 498
474 499 /*
475 500 * Set up kernel RPC thread pool for the NFS server.
476 501 */
477 502 if (nfssvcpool(maxservers)) {
478 503 fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting",
479 504 strerror(errno));
480 505 exit(1);
481 506 }
482 507
483 508 /*
484 509 * Set up blocked thread to do LWP creation on behalf of the kernel.
485 510 */
486 511 if (svcwait(NFS_SVCPOOL_ID)) {
487 512 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting",
488 513 strerror(errno));
489 514 exit(1);
490 515 }
491 516
492 517 /*
493 518 * RDMA start and stop thread.
494 519 * Per pool RDMA listener creation and
495 520 * destructor thread.
496 521 *
497 522 * start rdma services and block in the kernel.
498 523 * (only if proto or provider is not set to TCP or UDP)
499 524 */
500 525 if ((proto == NULL) && (provider == NULL)) {
501 526 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
502 527 nfs_server_vers_max, nfs_server_delegation)) {
503 528 fprintf(stderr,
504 529 "Can't set up RDMA creator thread : %s",
505 530 strerror(errno));
506 531 }
507 532 }
508 533
509 534 /*
510 535 * Now open up for signal delivery
511 536 */
512 537
513 538 (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
514 539 sigset(SIGTERM, sigflush);
515 540 sigset(SIGUSR1, quiesce);
516 541
517 542 /*
518 543 * Build a protocol block list for registration.
519 544 */
520 545 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
521 546 protobp->serv = "NFS";
522 547 protobp->versmin = nfs_server_vers_min;
523 548 protobp->versmax = nfs_server_vers_max;
524 549 protobp->program = NFS_PROGRAM;
525 550
526 551 protobp->next = (struct protob *)malloc(sizeof (struct protob));
|
↓ open down ↓ |
279 lines elided |
↑ open up ↑ |
527 552 protobp = protobp->next;
528 553 protobp->serv = "NFS_ACL"; /* not used */
529 554 protobp->versmin = nfs_server_vers_min;
530 555 /* XXX - this needs work to get the version just right */
531 556 protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
532 557 NFS_ACL_V3 : nfs_server_vers_max;
533 558 protobp->program = NFS_ACL_PROGRAM;
534 559 protobp->next = (struct protob *)NULL;
535 560
536 561 if (allflag) {
537 - if (do_all(protobp0, nfssvc, 0) == -1) {
562 + if (do_all_setbuf(protobp0, nfssvc, 0, nfs_server_sndbufsz,
563 + nfs_server_rcvbufsz) == -1) {
538 564 fprintf(stderr, "setnetconfig failed : %s",
539 565 strerror(errno));
540 566 exit(1);
541 567 }
542 568 } else if (proto) {
543 569 /* there's more than one match for the same protocol */
544 570 struct netconfig *nconf;
545 571 NCONF_HANDLE *nc;
546 572 bool_t protoFound = FALSE;
547 573 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
548 574 fprintf(stderr, "setnetconfig failed : %s",
549 575 strerror(errno));
550 576 goto done;
551 577 }
552 578 while (nconf = getnetconfig(nc)) {
553 579 if (strcmp(nconf->nc_proto, proto) == 0) {
554 580 protoFound = TRUE;
555 - do_one(nconf->nc_device, NULL,
556 - protobp0, nfssvc, 0);
581 + do_one_setbuf(nconf->nc_device, NULL,
582 + protobp0, nfssvc, 0,
583 + nfs_server_sndbufsz, nfs_server_rcvbufsz);
557 584 }
558 585 }
559 586 (void) endnetconfig(nc);
560 587 if (protoFound == FALSE) {
561 588 fprintf(stderr,
562 589 "couldn't find netconfig entry for protocol %s",
563 590 proto);
564 591 }
565 592 } else if (provider)
566 - do_one(provider, proto, protobp0, nfssvc, 0);
593 + do_one_setbuf(provider, proto, protobp0, nfssvc, 0,
594 + nfs_server_sndbufsz, nfs_server_rcvbufsz);
567 595 else {
568 596 for (providerp = defaultproviders;
569 597 *providerp != NULL; providerp++) {
570 598 provider = *providerp;
571 - do_one(provider, NULL, protobp0, nfssvc, 0);
599 + do_one_setbuf(provider, NULL, protobp0, nfssvc, 0,
600 + nfs_server_sndbufsz, nfs_server_rcvbufsz);
572 601 }
573 602 }
574 603 done:
575 604
576 605 free(protobp);
577 606 free(protobp0);
578 607
579 608 if (num_fds == 0) {
580 609 fprintf(stderr, "Could not start NFS service for any protocol."
581 610 " Exiting");
582 611 exit(1);
583 612 }
584 613
585 614 end_listen_fds = num_fds;
586 615
587 616 /*
588 617 * nfsd is up and running as far as we are concerned.
589 618 */
590 619 daemonize_fini(pipe_fd);
591 620
592 621 /*
593 622 * Get rid of unneeded privileges.
594 623 */
595 624 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
596 625 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
597 626
598 627 /*
599 628 * Poll for non-data control events on the transport descriptors.
600 629 */
601 630 poll_for_action();
602 631
603 632 /*
604 633 * If we get here, something failed in poll_for_action().
605 634 */
606 635 return (1);
607 636 }
608 637
609 638 static int
610 639 nfssvcpool(int maxservers)
611 640 {
612 641 struct svcpool_args npa;
613 642
614 643 npa.id = NFS_SVCPOOL_ID;
615 644 npa.maxthreads = maxservers;
616 645 npa.redline = 0;
617 646 npa.qsize = 0;
618 647 npa.timeout = 0;
619 648 npa.stksize = 0;
620 649 npa.max_same_xprt = 0;
621 650 return (_nfssys(SVCPOOL_CREATE, &npa));
622 651 }
623 652
624 653 /*
625 654 * Establish NFS service thread.
626 655 */
627 656 static int
628 657 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
629 658 {
630 659 struct nfs_svc_args nsa;
631 660
632 661 nsa.fd = fd;
633 662 nsa.netid = nconf->nc_netid;
634 663 nsa.addrmask = addrmask;
635 664 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
636 665 nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
637 666 NFS_V3 : nfs_server_vers_max;
638 667 nsa.versmin = nfs_server_vers_min;
639 668 /*
640 669 * If no version left, silently do nothing, previous
641 670 * checks will have assured at least TCP is available.
642 671 */
643 672 if (nsa.versmin > nsa.versmax)
644 673 return (0);
645 674 } else {
646 675 nsa.versmax = nfs_server_vers_max;
647 676 nsa.versmin = nfs_server_vers_min;
648 677 }
649 678 nsa.delegation = nfs_server_delegation;
650 679 return (_nfssys(NFS_SVC, &nsa));
651 680 }
652 681
653 682 static void
654 683 usage(void)
655 684 {
656 685 (void) fprintf(stderr,
657 686 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
658 687 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
659 688 (void) fprintf(stderr,
660 689 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
661 690 (void) fprintf(stderr,
662 691 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
663 692 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
664 693 (void) fprintf(stderr, "> zero,\n");
665 694 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
666 695 (void) fprintf(stderr,
667 696 "\ttransport is a transport provider name (i.e. device),\n");
668 697 (void) fprintf(stderr,
669 698 "\tlisten_backlog is the TCP listen backlog,\n");
670 699 (void) fprintf(stderr,
671 700 "\tand <nservers> must be a decimal number > zero.\n");
672 701 exit(1);
673 702 }
674 703
675 704 /*
676 705 * Issue nfssys system call to flush all logging buffers asynchronously.
677 706 *
678 707 * NOTICE: It is extremely important to flush NFS logging buffers when
679 708 * nfsd exits. When the system is halted or rebooted nfslogd
680 709 * may not have an opportunity to flush the buffers.
681 710 */
682 711 static void
683 712 nfsl_flush()
684 713 {
685 714 struct nfsl_flush_args nfa;
686 715
687 716 memset((void *)&nfa, 0, sizeof (nfa));
688 717 nfa.version = NFSL_FLUSH_ARGS_VERS;
689 718 nfa.directive = NFSL_ALL; /* flush all asynchronously */
690 719
691 720 if (_nfssys(LOG_FLUSH, &nfa) < 0)
692 721 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
693 722 strerror(errno));
694 723 }
695 724
696 725 /*
697 726 * SIGTERM handler.
698 727 * Flush logging buffers and exit.
699 728 */
700 729 static void
701 730 sigflush(int sig)
702 731 {
703 732 nfsl_flush();
704 733 _exit(0);
705 734 }
706 735
707 736 /*
708 737 * SIGUSR1 handler.
709 738 *
710 739 * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
711 740 *
712 741 * This is a Contracted Project Private interface, for the sole use
713 742 * of Sun Cluster HA-NFS. See PSARC/2004/497.
714 743 *
715 744 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
716 745 */
717 746 static void
718 747 quiesce(int sig)
719 748 {
720 749 int error;
721 750 int id = NFS_SVCPOOL_ID;
722 751
723 752 if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
724 753 /* Request server quiesce at next shutdown */
725 754 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
726 755
727 756 /*
728 757 * ENOENT is returned if there is no matching SVC pool
729 758 * for the id. Possibly because the pool is not yet setup.
730 759 * In this case, just exit as if no error. For all other errors,
731 760 * just return and allow caller to retry.
732 761 */
733 762 if (error && errno != ENOENT) {
734 763 syslog(LOG_ERR,
735 764 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
736 765 strerror(errno));
737 766 return;
738 767 }
739 768 }
740 769
741 770 /* Flush logging buffers */
742 771 nfsl_flush();
743 772
744 773 _exit(0);
745 774 }
746 775
747 776 /*
748 777 * DSS: distributed stable storage.
749 778 * Create leaf directories as required, keeping an eye on path
750 779 * lengths. Calls exit(1) on failure.
751 780 * The pathnames passed in must already exist, and must be writeable by nfsd.
752 781 * Note: the leaf directories under NFS4_VAR_DIR are not created here;
753 782 * they're created at pkg install.
754 783 */
755 784 static void
756 785 dss_mkleafdirs(uint_t npaths, char **pathnames)
757 786 {
758 787 int i;
759 788 char *tmppath = NULL;
760 789
761 790 /*
762 791 * Create the temporary storage used by dss_mkleafdir() here,
763 792 * rather than in that function, so that it only needs to be
764 793 * done once, rather than once for each call. Too big to put
765 794 * on the function's stack.
766 795 */
767 796 tmppath = (char *)malloc(MAXPATHLEN);
768 797 if (tmppath == NULL) {
769 798 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
770 799 exit(1);
771 800 }
772 801
773 802 for (i = 0; i < npaths; i++) {
774 803 char *p = pathnames[i];
775 804
776 805 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
777 806 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
778 807 }
779 808
780 809 free(tmppath);
781 810 }
782 811
783 812 /*
784 813 * Create "leaf" in "dir" (which must already exist).
785 814 * leaf: should start with a '/'
786 815 */
787 816 static void
788 817 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
789 818 {
790 819 /* MAXPATHLEN includes the terminating NUL */
791 820 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
792 821 fprintf(stderr, "stable storage path too long: %s%s. Exiting",
793 822 dir, leaf);
794 823 exit(1);
795 824 }
796 825
797 826 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
798 827
799 828 /* the directory may already exist: that's OK */
800 829 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
801 830 fprintf(stderr, "error creating stable storage directory: "
802 831 "%s: %s. Exiting", strerror(errno), tmppath);
803 832 exit(1);
804 833 }
805 834 }
806 835
807 836 /*
808 837 * Create the storage dirs, and pass the path list to the kernel.
809 838 * This requires the nfssrv module to be loaded; the _nfssys() syscall
810 839 * will fail ENOTSUP if it is not.
811 840 * Use libnvpair(3LIB) to pass the data to the kernel.
812 841 */
813 842 static int
814 843 dss_init(uint_t npaths, char **pathnames)
815 844 {
816 845 int i, j, nskipped, error;
817 846 char *bufp;
818 847 uint32_t bufsize;
819 848 size_t buflen;
820 849 nvlist_t *nvl;
821 850
822 851 if (npaths > 1) {
823 852 /*
824 853 * We need to remove duplicate paths; this might be user error
825 854 * in the general case, but HA-NFSv4 can also cause this.
826 855 * Sort the pathnames array, and NULL out duplicates,
827 856 * then write the non-NULL entries to a new array.
828 857 * Sorting will also allow the kernel to optimise its searches.
829 858 */
830 859
831 860 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
832 861
833 862 /* now NULL out any duplicates */
834 863 i = 0; j = 1; nskipped = 0;
835 864 while (j < npaths) {
836 865 if (strcmp(pathnames[i], pathnames[j]) == NULL) {
837 866 pathnames[j] = NULL;
838 867 j++;
839 868 nskipped++;
840 869 continue;
841 870 }
842 871
843 872 /* skip i over any of its NULLed duplicates */
844 873 i = j++;
845 874 }
846 875
847 876 /* finally, write the non-NULL entries to a new array */
848 877 if (nskipped > 0) {
849 878 int nreal;
850 879 size_t sz;
851 880 char **tmp_pathnames;
852 881
853 882 nreal = npaths - nskipped;
854 883
855 884 sz = nreal * sizeof (char *);
856 885 tmp_pathnames = (char **)malloc(sz);
857 886 if (tmp_pathnames == NULL) {
858 887 fprintf(stderr, "tmp_pathnames malloc failed");
859 888 exit(1);
860 889 }
861 890
862 891 for (i = 0, j = 0; i < npaths; i++)
863 892 if (pathnames[i] != NULL)
864 893 tmp_pathnames[j++] = pathnames[i];
865 894 free(pathnames);
866 895 pathnames = tmp_pathnames;
867 896 npaths = nreal;
868 897 }
869 898
870 899 }
871 900
872 901 /* Create directories to store the distributed state files */
873 902 dss_mkleafdirs(npaths, pathnames);
874 903
875 904 /* Create the name-value pair list */
876 905 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
877 906 if (error) {
878 907 fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno));
879 908 return (1);
880 909 }
881 910
882 911 /* Add the pathnames array as a single name-value pair */
883 912 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
884 913 pathnames, npaths);
885 914 if (error) {
886 915 fprintf(stderr, "nvlist_add_string_array failed: %s.",
887 916 strerror(errno));
888 917 nvlist_free(nvl);
889 918 return (1);
890 919 }
891 920
892 921 /*
893 922 * Pack list into contiguous memory, for passing to kernel.
894 923 * nvlist_pack() will allocate the memory for the buffer,
895 924 * which we should free() when no longer needed.
896 925 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
897 926 */
898 927 bufp = NULL;
899 928 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
900 929 if (error) {
901 930 fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno));
902 931 nvlist_free(nvl);
903 932 return (1);
904 933 }
905 934
906 935 /* Now we have the packed buffer, we no longer need the list */
907 936 nvlist_free(nvl);
908 937
909 938 /*
910 939 * Let the kernel know in advance how big the buffer is.
911 940 * NOTE: we cannot just pass buflen, since size_t is a long, and
912 941 * thus a different size between ILP32 userland and LP64 kernel.
913 942 * Use an int for the transfer, since that should be big enough;
914 943 * this is a no-op at the moment, here, since nfsd is 32-bit, but
915 944 * that could change.
916 945 */
917 946 bufsize = (uint32_t)buflen;
918 947 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
919 948 if (error) {
920 949 fprintf(stderr,
921 950 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
922 951 strerror(errno));
923 952 free(bufp);
924 953 return (1);
925 954 }
926 955
927 956 /* Pass the packed buffer to the kernel */
928 957 error = _nfssys(NFS4_DSS_SETPATHS, bufp);
929 958 if (error) {
930 959 fprintf(stderr,
931 960 "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
932 961 free(bufp);
933 962 return (1);
934 963 }
935 964
936 965 /*
937 966 * The kernel has now unpacked the buffer and extracted the
938 967 * pathnames array, we no longer need the buffer.
939 968 */
940 969 free(bufp);
941 970
942 971 return (0);
943 972 }
944 973
945 974 /*
946 975 * Quick sort string compare routine, for qsort.
947 976 * Needed to make arg types correct.
948 977 */
949 978 int
950 979 qstrcmp(const void *p1, const void *p2)
951 980 {
952 981 char *s1 = *((char **)p1);
953 982 char *s2 = *((char **)p2);
954 983
955 984 return (strcmp(s1, s2));
956 985 }
|
↓ open down ↓ |
375 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX