1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * nfs_tbind.c, common part for nfsd and lockd.
  28  */
  29 
  30 #define PORTMAP
  31 
  32 #include <tiuser.h>
  33 #include <fcntl.h>
  34 #include <netconfig.h>
  35 #include <stropts.h>
  36 #include <errno.h>
  37 #include <syslog.h>
  38 #include <rpc/rpc.h>
  39 #include <rpc/pmap_prot.h>
  40 #include <sys/time.h>
  41 #include <sys/resource.h>
  42 #include <signal.h>
  43 #include <netdir.h>
  44 #include <unistd.h>
  45 #include <string.h>
  46 #include <netinet/tcp.h>
  47 #include <malloc.h>
  48 #include <stdlib.h>
  49 #include "nfs_tbind.h"
  50 #include <nfs/nfs.h>
  51 #include <nfs/nfs_acl.h>
  52 #include <nfs/nfssys.h>
  53 #include <nfs/nfs4.h>
  54 #include <zone.h>
  55 #include <sys/socket.h>
  56 #include <tsol/label.h>
  57 
  58 /*
  59  * Determine valid semantics for most applications.
  60  */
  61 #define OK_TPI_TYPE(_nconf) \
  62         (_nconf->nc_semantics == NC_TPI_CLTS || \
  63         _nconf->nc_semantics == NC_TPI_COTS || \
  64         _nconf->nc_semantics == NC_TPI_COTS_ORD)
  65 
  66 #define BE32_TO_U32(a) \
  67         ((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
  68         (((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
  69         (((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
  70         ((ulong_t)((uchar_t *)a)[3] & 0xFF))
  71 
  72 /*
  73  * Number of elements to add to the poll array on each allocation.
  74  */
  75 #define POLL_ARRAY_INC_SIZE     64
  76 
  77 /*
  78  * Number of file descriptors by which the process soft limit may be
  79  * increased on each call to nofile_increase(0).
  80  */
  81 #define NOFILE_INC_SIZE 64
  82 
  83 struct conn_ind {
  84         struct conn_ind *conn_next;
  85         struct conn_ind *conn_prev;
  86         struct t_call   *conn_call;
  87 };
  88 
  89 struct conn_entry {
  90         bool_t                  closing;
  91         struct netconfig        nc;
  92 };
  93 
  94 /*
  95  * this file contains transport routines common to nfsd and lockd
  96  */
  97 static  int     nofile_increase(int);
  98 static  int     reuseaddr(int);
  99 static  int     recvucred(int);
 100 static  int     anonmlp(int);
 101 static  void    add_to_poll_list(int, struct netconfig *);
 102 static  char    *serv_name_to_port_name(char *);
 103 static  int     bind_to_proto(char *, char *, struct netbuf **,
 104                                 struct netconfig **, int, int);
 105 static  int     bind_to_provider(char *, char *, struct netbuf **,
 106                                 struct netconfig **, int, int);
 107 static  void    conn_close_oldest(void);
 108 static  boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
 109 static  void    cots_listen_event(int, int);
 110 static  int     discon_get(int, struct netconfig *, struct conn_ind **);
 111 static  int     do_poll_clts_action(int, int);
 112 static  int     do_poll_cots_action(int, int);
 113 static  void    remove_from_poll_list(int);
 114 static  int     set_addrmask(int, struct netconfig *, struct netbuf *);
 115 static  int     is_listen_fd_index(int);
 116 
 117 static  struct pollfd *poll_array;
 118 static  struct conn_entry *conn_polled;
 119 static  int     num_conns;              /* Current number of connections */
 120 int             (*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
 121                 struct netbuf *);
 122 static int      setopt(int fd, int level, int name, int value);
 123 
 124 extern bool_t __pmap_set(const rpcprog_t program, const rpcvers_t version,
 125     const struct netconfig *nconf, const struct netbuf *address);
 126 
 127 /*
 128  * Called to create and prepare a transport descriptor for in-kernel
 129  * RPC service.
 130  * Returns -1 on failure and a valid descriptor on success.
 131  */
 132 int
 133 nfslib_transport_open(struct netconfig *nconf)
 134 {
 135         int fd;
 136         struct strioctl strioc;
 137 
 138         if ((nconf == (struct netconfig *)NULL) ||
 139             (nconf->nc_device == (char *)NULL)) {
 140                 syslog(LOG_ERR, "no netconfig device");
 141                 return (-1);
 142         }
 143 
 144         /*
 145          * Open the transport device.
 146          */
 147         fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
 148         if (fd == -1) {
 149                 if (t_errno == TSYSERR && errno == EMFILE &&
 150                     (nofile_increase(0) == 0)) {
 151                         /* Try again with a higher NOFILE limit. */
 152                         fd = t_open(nconf->nc_device, O_RDWR,
 153                             (struct t_info *)NULL);
 154                 }
 155                 if (fd == -1) {
 156                         syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
 157                             nconf->nc_device, t_errno);
 158                         return (-1);
 159                 }
 160         }
 161 
 162         /*
 163          * Pop timod because the RPC module must be as close as possible
 164          * to the transport.
 165          */
 166         if (ioctl(fd, I_POP, 0) < 0) {
 167                 syslog(LOG_ERR, "I_POP of timod failed: %m");
 168                 (void) t_close(fd);
 169                 return (-1);
 170         }
 171 
 172         /*
 173          * Common code for CLTS and COTS transports
 174          */
 175         if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
 176                 syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
 177                 (void) t_close(fd);
 178                 return (-1);
 179         }
 180 
 181         strioc.ic_cmd = RPC_SERVER;
 182         strioc.ic_dp = (char *)0;
 183         strioc.ic_len = 0;
 184         strioc.ic_timout = -1;
 185 
 186         /* Tell rpcmod to act like a server stream. */
 187         if (ioctl(fd, I_STR, &strioc) < 0) {
 188                 syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
 189                 (void) t_close(fd);
 190                 return (-1);
 191         }
 192 
 193         /*
 194          * Re-push timod so that we will still be doing TLI
 195          * operations on the descriptor.
 196          */
 197         if (ioctl(fd, I_PUSH, "timod") < 0) {
 198                 syslog(LOG_ERR, "I_PUSH of timod failed: %m");
 199                 (void) t_close(fd);
 200                 return (-1);
 201         }
 202 
 203         /*
 204          * Enable options of returning the ip's for udp.
 205          */
 206         if (strcmp(nconf->nc_netid, "udp6") == 0)
 207                 __rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
 208         else if (strcmp(nconf->nc_netid, "udp") == 0)
 209                 __rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
 210 
 211         return (fd);
 212 }
 213 
 214 static int
 215 nofile_increase(int limit)
 216 {
 217         struct rlimit rl;
 218 
 219         if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
 220                 syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
 221                 return (-1);
 222         }
 223 
 224         if (limit > 0)
 225                 rl.rlim_cur = limit;
 226         else
 227                 rl.rlim_cur += NOFILE_INC_SIZE;
 228 
 229         if (rl.rlim_cur > rl.rlim_max &&
 230             rl.rlim_max != RLIM_INFINITY)
 231                 rl.rlim_max = rl.rlim_cur;
 232 
 233         if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
 234                 syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
 235                     rl.rlim_cur);
 236                 return (-1);
 237         }
 238 
 239         return (0);
 240 }
 241 
 242 static int
 243 nfslib_set_sockbuf(int fd, int which, int val)
 244 {
 245         if ((which != SO_RCVBUF) && (which != SO_SNDBUF))
 246                 return (-1);
 247 
 248         syslog(LOG_DEBUG, "Set %s option to %d",
 249             ((which == SO_RCVBUF) ? "SO_RCVBUF" : "SO_SNDBUF"), val);
 250 
 251         if (setopt(fd, SOL_SOCKET, which, val) < 0) {
 252                 syslog(LOG_ERR, "couldn't set %s to %d - t_errno = %d",
 253                     ((which == SO_RCVBUF) ? "SO_RCVBUF" : "SO_SNDBUF"),
 254                     val, t_errno);
 255                 syslog(LOG_ERR, "Check and increase system-wide tcp_max_buf");
 256                 return (-1);
 257         }
 258         return (0);
 259 }
 260 
 261 int
 262 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
 263         struct nd_hostserv *hs, int backlog, int sndbufsz, int rcvbufsz)
 264 {
 265         int fd;
 266         struct t_bind  *ntb;
 267         struct t_bind tb;
 268         struct nd_addrlist *addrlist;
 269         struct t_optmgmt req, resp;
 270         struct opthdr *opt;
 271         char reqbuf[128];
 272         bool_t use_any = FALSE;
 273         bool_t gzone = TRUE;
 274 
 275         if ((fd = nfslib_transport_open(nconf)) == -1) {
 276                 syslog(LOG_ERR, "cannot establish transport service over %s",
 277                     nconf->nc_device);
 278                 return (-1);
 279         }
 280 
 281         addrlist = (struct nd_addrlist *)NULL;
 282 
 283         /* nfs4_callback service does not used a fieed port number */
 284 
 285         if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
 286                 tb.addr.maxlen = 0;
 287                 tb.addr.len = 0;
 288                 tb.addr.buf = 0;
 289                 use_any = TRUE;
 290                 gzone = (getzoneid() == GLOBAL_ZONEID);
 291         } else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
 292 
 293                 syslog(LOG_ERR,
 294                 "Cannot get address for transport %s host %s service %s",
 295                     nconf->nc_netid, hs->h_host, hs->h_serv);
 296                 (void) t_close(fd);
 297                 return (-1);
 298         }
 299 
 300         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 301                 /*
 302                  * If we're running over TCP, then set the
 303                  * SO_REUSEADDR option so that we can bind
 304                  * to our preferred address even if previously
 305                  * left connections exist in FIN_WAIT states.
 306                  * This is somewhat bogus, but otherwise you have
 307                  * to wait 2 minutes to restart after killing it.
 308                  */
 309                 if (reuseaddr(fd) == -1) {
 310                         syslog(LOG_WARNING,
 311                         "couldn't set SO_REUSEADDR option on transport");
 312                 }
 313         } else if (strcmp(nconf->nc_proto, "udp") == 0) {
 314                 /*
 315                  * In order to run MLP on UDP, we need to handle creds.
 316                  */
 317                 if (recvucred(fd) == -1) {
 318                         syslog(LOG_WARNING,
 319                             "couldn't set SO_RECVUCRED option on transport");
 320                 }
 321         }
 322 
 323         /*
 324          * Make non global zone nfs4_callback port MLP
 325          */
 326         if (use_any && is_system_labeled() && !gzone) {
 327                 if (anonmlp(fd) == -1) {
 328                         /*
 329                          * failing to set this option means nfs4_callback
 330                          * could fail silently later. So fail it with
 331                          * with an error message now.
 332                          */
 333                         syslog(LOG_ERR,
 334                             "couldn't set SO_ANON_MLP option on transport");
 335                         (void) t_close(fd);
 336                         return (-1);
 337                 }
 338         }
 339 
 340         if (nconf->nc_semantics == NC_TPI_CLTS)
 341                 tb.qlen = 0;
 342         else
 343                 tb.qlen = backlog;
 344 
 345         /* LINTED pointer alignment */
 346         ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
 347         if (ntb == (struct t_bind *)NULL) {
 348                 syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
 349                 (void) t_close(fd);
 350                 netdir_free((void *)addrlist, ND_ADDRLIST);
 351                 return (-1);
 352         }
 353 
 354         /*
 355          * XXX - what about the space tb->addr.buf points to? This should
 356          * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
 357          * should't be called with T_ALL.
 358          */
 359         if (addrlist)
 360                 tb.addr = *(addrlist->n_addrs);              /* structure copy */
 361 
 362         if (t_bind(fd, &tb, ntb) == -1) {
 363                 syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
 364                 (void) t_free((char *)ntb, T_BIND);
 365                 netdir_free((void *)addrlist, ND_ADDRLIST);
 366                 (void) t_close(fd);
 367                 return (-1);
 368         }
 369 
 370         /* make sure we bound to the right address */
 371         if (use_any == FALSE &&
 372             (tb.addr.len != ntb->addr.len ||
 373             memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
 374                 syslog(LOG_ERR, "t_bind to wrong address");
 375                 (void) t_free((char *)ntb, T_BIND);
 376                 netdir_free((void *)addrlist, ND_ADDRLIST);
 377                 (void) t_close(fd);
 378                 return (-1);
 379         }
 380 
 381         /*
 382          * Call nfs4svc_setport so that the kernel can be
 383          * informed what port number the daemon is listing
 384          * for incoming connection requests.
 385          */
 386 
 387         if ((nconf->nc_semantics == NC_TPI_COTS ||
 388             nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
 389                 (*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
 390 
 391         *addr = &ntb->addr;
 392         netdir_free((void *)addrlist, ND_ADDRLIST);
 393 
 394         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 395                 /*
 396                  * Disable the Nagle algorithm on TCP connections.
 397                  * Connections accepted from this listener will
 398                  * inherit the listener options.
 399                  */
 400 
 401                 /* LINTED pointer alignment */
 402                 opt = (struct opthdr *)reqbuf;
 403                 opt->level = IPPROTO_TCP;
 404                 opt->name = TCP_NODELAY;
 405                 opt->len = sizeof (int);
 406 
 407                 /* LINTED pointer alignment */
 408                 *(int *)((char *)opt + sizeof (*opt)) = 1;
 409 
 410                 req.flags = T_NEGOTIATE;
 411                 req.opt.len = sizeof (*opt) + opt->len;
 412                 req.opt.buf = (char *)opt;
 413                 resp.flags = 0;
 414                 resp.opt.buf = reqbuf;
 415                 resp.opt.maxlen = sizeof (reqbuf);
 416 
 417                 if (t_optmgmt(fd, &req, &resp) < 0 ||
 418                     resp.flags != T_SUCCESS) {
 419                         syslog(LOG_ERR,
 420         "couldn't set NODELAY option for proto %s: t_errno = %d, %m",
 421                             nconf->nc_proto, t_errno);
 422                 }
 423 
 424                 if (sndbufsz > 0)
 425                         (void) nfslib_set_sockbuf(fd, SO_SNDBUF, sndbufsz);
 426                 if (rcvbufsz > 0)
 427                         (void) nfslib_set_sockbuf(fd, SO_RCVBUF, rcvbufsz);
 428         }
 429 
 430         return (fd);
 431 }
 432 
 433 static int
 434 setopt(int fd, int level, int name, int value)
 435 {
 436         struct t_optmgmt req, resp;
 437         struct {
 438                 struct opthdr opt;
 439                 int value;
 440         } reqbuf;
 441 
 442         reqbuf.opt.level = level;
 443         reqbuf.opt.name = name;
 444         reqbuf.opt.len = sizeof (int);
 445 
 446         reqbuf.value = value;
 447 
 448         req.flags = T_NEGOTIATE;
 449         req.opt.len = sizeof (reqbuf);
 450         req.opt.buf = (char *)&reqbuf;
 451 
 452         resp.flags = 0;
 453         resp.opt.buf = (char *)&reqbuf;
 454         resp.opt.maxlen = sizeof (reqbuf);
 455 
 456         if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
 457                 t_error("t_optmgmt");
 458                 return (-1);
 459         }
 460         return (0);
 461 }
 462 
 463 static int
 464 reuseaddr(int fd)
 465 {
 466         return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
 467 }
 468 
 469 static int
 470 recvucred(int fd)
 471 {
 472         return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
 473 }
 474 
 475 static int
 476 anonmlp(int fd)
 477 {
 478         return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
 479 }
 480 
 481 void
 482 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
 483 {
 484         int error;
 485 
 486         /*
 487          * Save the error code across syslog(), just in case syslog()
 488          * gets its own error and, therefore, overwrites errno.
 489          */
 490         error = errno;
 491         if (t_errno == TSYSERR) {
 492                 syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
 493                     tli_name, fd, nconf->nc_proto);
 494         } else {
 495                 syslog(LOG_ERR,
 496                     "%s(file descriptor %d/transport %s) TLI error %d",
 497                     tli_name, fd, nconf->nc_proto, t_errno);
 498         }
 499         errno = error;
 500 }
 501 
 502 /*
 503  * Called to set up service over a particular transport also
 504  * set send and receive buffer size for transport connection.
 505  */
 506 void
 507 do_one_setbuf(char *provider, NETSELDECL(proto), struct protob *protobp0,
 508         int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap,
 509         int sndbufsz, int rcvbufsz)
 510 {
 511         register int sock;
 512         struct protob *protobp;
 513         struct netbuf *retaddr;
 514         struct netconfig *retnconf;
 515         struct netbuf addrmask;
 516         int vers;
 517         int err;
 518         int l;
 519 
 520         if (provider)
 521                 sock = bind_to_provider(provider, protobp0->serv, &retaddr,
 522                     &retnconf, sndbufsz, rcvbufsz);
 523         else
 524                 sock = bind_to_proto(proto, protobp0->serv, &retaddr,
 525                     &retnconf, sndbufsz, rcvbufsz);
 526 
 527         if (sock == -1) {
 528                 (void) syslog(LOG_ERR,
 529         "Cannot establish %s service over %s: transport setup problem.",
 530                     protobp0->serv, provider ? provider : proto);
 531                 return;
 532         }
 533 
 534         if (set_addrmask(sock, retnconf, &addrmask) < 0) {
 535                 (void) syslog(LOG_ERR,
 536                     "Cannot set address mask for %s", retnconf->nc_netid);
 537                 return;
 538         }
 539 
 540         /*
 541          * Register all versions of the programs in the protocol block list.
 542          */
 543         l = strlen(NC_UDP);
 544         for (protobp = protobp0; protobp; protobp = protobp->next) {
 545                 for (vers = protobp->versmin; vers <= protobp->versmax;
 546                     vers++) {
 547                         if ((protobp->program == NFS_PROGRAM ||
 548                             protobp->program == NFS_ACL_PROGRAM) &&
 549                             vers == NFS_V4 &&
 550                             strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
 551                                 continue;
 552 
 553                         if (use_pmap) {
 554                                 /*
 555                                  * Note that if we're using a portmapper
 556                                  * instead of rpcbind then we can't do an
 557                                  * unregister operation here.
 558                                  *
 559                                  * The reason is that the portmapper unset
 560                                  * operation removes all the entries for a
 561                                  * given program/version regardelss of
 562                                  * transport protocol.
 563                                  *
 564                                  * The caller of this routine needs to ensure
 565                                  * that __pmap_unset() has been called for all
 566                                  * program/version service pairs they plan
 567                                  * to support before they start registering
 568                                  * each program/version/protocol triplet.
 569                                  */
 570                                 (void) __pmap_set(protobp->program, vers,
 571                                     retnconf, retaddr);
 572                         } else {
 573                                 (void) rpcb_unset(protobp->program, vers,
 574                                     retnconf);
 575                                 (void) rpcb_set(protobp->program, vers,
 576                                     retnconf, retaddr);
 577                         }
 578                 }
 579         }
 580 
 581         if (retnconf->nc_semantics == NC_TPI_CLTS) {
 582                 /* Don't drop core if supporting module(s) aren't loaded. */
 583                 (void) signal(SIGSYS, SIG_IGN);
 584 
 585                 /*
 586                  * svc() doesn't block, it returns success or failure.
 587                  */
 588 
 589                 if (svc == NULL && Mysvc4 != NULL)
 590                         err = (*Mysvc4)(sock, &addrmask, retnconf,
 591                             NFS4_SETPORT|NFS4_KRPC_START, retaddr);
 592                 else
 593                         err = (*svc)(sock, addrmask, retnconf);
 594 
 595                 if (err < 0) {
 596                         (void) syslog(LOG_ERR,
 597                             "Cannot establish %s service over <file desc."
 598                             " %d, protocol %s> : %m. Exiting",
 599                             protobp0->serv, sock, retnconf->nc_proto);
 600                         exit(1);
 601                 }
 602         }
 603 
 604         /*
 605          * We successfully set up the server over this transport.
 606          * Add this descriptor to the one being polled on.
 607          */
 608         add_to_poll_list(sock, retnconf);
 609 }
 610 
 611 /*
 612  * Set up the NFS service over all the available transports and
 613  * also set send and receive buffer size for transport connection.
 614  * Returns -1 for failure, 0 for success.
 615  */
 616 int
 617 do_all_setbuf(struct protob *protobp,
 618         int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap,
 619         int sndbufsz, int rcvbufsz)
 620 {
 621         struct netconfig *nconf;
 622         NCONF_HANDLE *nc;
 623         int l;
 624 
 625         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
 626                 syslog(LOG_ERR, "setnetconfig failed: %m");
 627                 return (-1);
 628         }
 629         l = strlen(NC_UDP);
 630         while (nconf = getnetconfig(nc)) {
 631                 if ((nconf->nc_flag & NC_VISIBLE) &&
 632                     strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
 633                     OK_TPI_TYPE(nconf) &&
 634                     (protobp->program != NFS4_CALLBACK ||
 635                     strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
 636                         do_one_setbuf(nconf->nc_device, nconf->nc_proto,
 637                             protobp, svc, use_pmap, sndbufsz, rcvbufsz);
 638         }
 639         (void) endnetconfig(nc);
 640         return (0);
 641 }
 642 
 643 /*
 644  * Called to set up service over a particular transport.
 645  */
 646 void
 647 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
 648         int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
 649 {
 650         do_one_setbuf(provider, proto, protobp0, svc, use_pmap, 0, 0);
 651 }
 652 
 653 /*
 654  * Set up the NFS service over all the available transports.
 655  * Returns -1 for failure, 0 for success.
 656  */
 657 int
 658 do_all(struct protob *protobp,
 659         int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
 660 {
 661         return (do_all_setbuf(protobp, svc, use_pmap, 0, 0));
 662 }
 663 
 664 /*
 665  * poll on the open transport descriptors for events and errors.
 666  */
 667 void
 668 poll_for_action(void)
 669 {
 670         int nfds;
 671         int i;
 672 
 673         /*
 674          * Keep polling until all transports have been closed. When this
 675          * happens, we return.
 676          */
 677         while ((int)num_fds > 0) {
 678                 nfds = poll(poll_array, num_fds, INFTIM);
 679                 switch (nfds) {
 680                 case 0:
 681                         continue;
 682 
 683                 case -1:
 684                         /*
 685                          * Some errors from poll could be
 686                          * due to temporary conditions, and we try to
 687                          * be robust in the face of them. Other
 688                          * errors (should never happen in theory)
 689                          * are fatal (eg. EINVAL, EFAULT).
 690                          */
 691                         switch (errno) {
 692                         case EINTR:
 693                                 continue;
 694 
 695                         case EAGAIN:
 696                         case ENOMEM:
 697                                 (void) sleep(10);
 698                                 continue;
 699 
 700                         default:
 701                                 (void) syslog(LOG_ERR,
 702                                     "poll failed: %m. Exiting");
 703                                 exit(1);
 704                         }
 705                 default:
 706                         break;
 707                 }
 708 
 709                 /*
 710                  * Go through the poll list looking for events.
 711                  */
 712                 for (i = 0; i < num_fds && nfds > 0; i++) {
 713                         if (poll_array[i].revents) {
 714                                 nfds--;
 715                                 /*
 716                                  * We have a message, so try to read it.
 717                                  * Record the error return in errno,
 718                                  * so that syslog(LOG_ERR, "...%m")
 719                                  * dumps the corresponding error string.
 720                                  */
 721                                 if (conn_polled[i].nc.nc_semantics ==
 722                                     NC_TPI_CLTS) {
 723                                         errno = do_poll_clts_action(
 724                                             poll_array[i].fd, i);
 725                                 } else {
 726                                         errno = do_poll_cots_action(
 727                                             poll_array[i].fd, i);
 728                                 }
 729 
 730                                 if (errno == 0)
 731                                         continue;
 732                                 /*
 733                                  * Most returned error codes mean that there is
 734                                  * fatal condition which we can only deal with
 735                                  * by closing the transport.
 736                                  */
 737                                 if (errno != EAGAIN && errno != ENOMEM) {
 738                                         (void) syslog(LOG_ERR,
 739                 "Error (%m) reading descriptor %d/transport %s. Closing it.",
 740                                             poll_array[i].fd,
 741                                             conn_polled[i].nc.nc_proto);
 742                                         (void) t_close(poll_array[i].fd);
 743                                         remove_from_poll_list(poll_array[i].fd);
 744 
 745                                 } else if (errno == ENOMEM)
 746                                         (void) sleep(5);
 747                         }
 748                 }
 749         }
 750 
 751         (void) syslog(LOG_ERR,
 752             "All transports have been closed with errors. Exiting.");
 753 }
 754 
 755 /*
 756  * Allocate poll/transport array entries for this descriptor.
 757  */
 758 static void
 759 add_to_poll_list(int fd, struct netconfig *nconf)
 760 {
 761         static int poll_array_size = 0;
 762 
 763         /*
 764          * If the arrays are full, allocate new ones.
 765          */
 766         if (num_fds == poll_array_size) {
 767                 struct pollfd *tpa;
 768                 struct conn_entry *tnp;
 769 
 770                 if (poll_array_size != 0) {
 771                         tpa = poll_array;
 772                         tnp = conn_polled;
 773                 } else
 774                         tpa = (struct pollfd *)0;
 775 
 776                 poll_array_size += POLL_ARRAY_INC_SIZE;
 777                 /*
 778                  * Allocate new arrays.
 779                  */
 780                 poll_array = (struct pollfd *)
 781                     malloc(poll_array_size * sizeof (struct pollfd) + 256);
 782                 conn_polled = (struct conn_entry *)
 783                     malloc(poll_array_size * sizeof (struct conn_entry) + 256);
 784                 if (poll_array == (struct pollfd *)NULL ||
 785                     conn_polled == (struct conn_entry *)NULL) {
 786                         syslog(LOG_ERR, "malloc failed for poll array");
 787                         exit(1);
 788                 }
 789 
 790                 /*
 791                  * Copy the data of the old ones into new arrays, and
 792                  * free the old ones.
 793                  */
 794                 if (tpa) {
 795                         (void) memcpy((void *)poll_array, (void *)tpa,
 796                             num_fds * sizeof (struct pollfd));
 797                         (void) memcpy((void *)conn_polled, (void *)tnp,
 798                             num_fds * sizeof (struct conn_entry));
 799                         free((void *)tpa);
 800                         free((void *)tnp);
 801                 }
 802         }
 803 
 804         /*
 805          * Set the descriptor and event list. All possible events are
 806          * polled for.
 807          */
 808         poll_array[num_fds].fd = fd;
 809         poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
 810 
 811         /*
 812          * Copy the transport data over too.
 813          */
 814         conn_polled[num_fds].nc = *nconf;
 815         conn_polled[num_fds].closing = 0;
 816 
 817         /*
 818          * Set the descriptor to non-blocking. Avoids a race
 819          * between data arriving on the stream and then having it
 820          * flushed before we can read it.
 821          */
 822         if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
 823                 (void) syslog(LOG_ERR,
 824         "fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
 825                     num_fds, nconf->nc_proto);
 826                 exit(1);
 827         }
 828 
 829         /*
 830          * Count this descriptor.
 831          */
 832         ++num_fds;
 833 }
 834 
 835 static void
 836 remove_from_poll_list(int fd)
 837 {
 838         int i;
 839         int num_to_copy;
 840 
 841         for (i = 0; i < num_fds; i++) {
 842                 if (poll_array[i].fd == fd) {
 843                         --num_fds;
 844                         num_to_copy = num_fds - i;
 845                         (void) memcpy((void *)&poll_array[i],
 846                             (void *)&poll_array[i+1],
 847                             num_to_copy * sizeof (struct pollfd));
 848                         (void) memset((void *)&poll_array[num_fds], 0,
 849                             sizeof (struct pollfd));
 850                         (void) memcpy((void *)&conn_polled[i],
 851                             (void *)&conn_polled[i+1],
 852                             num_to_copy * sizeof (struct conn_entry));
 853                         (void) memset((void *)&conn_polled[num_fds], 0,
 854                             sizeof (struct conn_entry));
 855                         return;
 856                 }
 857         }
 858         syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
 859 
 860 }
 861 
 862 /*
 863  * Called to read and interpret the event on a connectionless descriptor.
 864  * Returns 0 if successful, or a UNIX error code if failure.
 865  */
 866 static int
 867 do_poll_clts_action(int fd, int conn_index)
 868 {
 869         int error;
 870         int ret;
 871         int flags;
 872         struct netconfig *nconf = &conn_polled[conn_index].nc;
 873         static struct t_unitdata *unitdata = NULL;
 874         static struct t_uderr *uderr = NULL;
 875         static int oldfd = -1;
 876         struct nd_hostservlist *host = NULL;
 877         struct strbuf ctl[1], data[1];
 878         /*
 879          * We just need to have some space to consume the
 880          * message in the event we can't use the TLI interface to do the
 881          * job.
 882          *
 883          * We flush the message using getmsg(). For the control part
 884          * we allocate enough for any TPI header plus 32 bytes for address
 885          * and options. For the data part, there is nothing magic about
 886          * the size of the array, but 256 bytes is probably better than
 887          * 1 byte, and we don't expect any data portion anyway.
 888          *
 889          * If the array sizes are too small, we handle this because getmsg()
 890          * (called to consume the message) will return MOREDATA|MORECTL.
 891          * Thus we just call getmsg() until it's read the message.
 892          */
 893         char ctlbuf[sizeof (union T_primitives) + 32];
 894         char databuf[256];
 895 
 896         /*
 897          * If this is the same descriptor as the last time
 898          * do_poll_clts_action was called, we can save some
 899          * de-allocation and allocation.
 900          */
 901         if (oldfd != fd) {
 902                 oldfd = fd;
 903 
 904                 if (unitdata) {
 905                         (void) t_free((char *)unitdata, T_UNITDATA);
 906                         unitdata = NULL;
 907                 }
 908                 if (uderr) {
 909                         (void) t_free((char *)uderr, T_UDERROR);
 910                         uderr = NULL;
 911                 }
 912         }
 913 
 914         /*
 915          * Allocate a unitdata structure for receiving the event.
 916          */
 917         if (unitdata == NULL) {
 918                 /* LINTED pointer alignment */
 919                 unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
 920                 if (unitdata == NULL) {
 921                         if (t_errno == TSYSERR) {
 922                                 /*
 923                                  * Save the error code across
 924                                  * syslog(), just in case
 925                                  * syslog() gets its own error
 926                                  * and therefore overwrites errno.
 927                                  */
 928                                 error = errno;
 929                                 (void) syslog(LOG_ERR,
 930         "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
 931                                     fd, nconf->nc_proto);
 932                                 return (error);
 933                         }
 934                         (void) syslog(LOG_ERR,
 935 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
 936                             fd, nconf->nc_proto, t_errno);
 937                         goto flush_it;
 938                 }
 939         }
 940 
 941 try_again:
 942         flags = 0;
 943 
 944         /*
 945          * The idea is we wait for T_UNITDATA_IND's. Of course,
 946          * we don't get any, because rpcmod filters them out.
 947          * However, we need to call t_rcvudata() to let TLI
 948          * tell us we have a T_UDERROR_IND.
 949          *
 950          * algorithm is:
 951          *      t_rcvudata(), expecting TLOOK.
 952          *      t_look(), expecting T_UDERR.
 953          *      t_rcvuderr(), expecting success (0).
 954          *      expand destination address into ASCII,
 955          *      and dump it.
 956          */
 957 
 958         ret = t_rcvudata(fd, unitdata, &flags);
 959         if (ret == 0 || t_errno == TBUFOVFLW) {
 960                 (void) syslog(LOG_WARNING,
 961 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
 962                     fd, nconf->nc_proto, unitdata->udata.len);
 963 
 964                 /*
 965                  * Even though we don't expect any data, in case we do,
 966                  * keep reading until there is no more.
 967                  */
 968                 if (flags & T_MORE)
 969                         goto try_again;
 970 
 971                 return (0);
 972         }
 973 
 974         switch (t_errno) {
 975         case TNODATA:
 976                 return (0);
 977         case TSYSERR:
 978                 /*
 979                  * System errors are returned to caller.
 980                  * Save the error code across
 981                  * syslog(), just in case
 982                  * syslog() gets its own error
 983                  * and therefore overwrites errno.
 984                  */
 985                 error = errno;
 986                 (void) syslog(LOG_ERR,
 987                     "t_rcvudata(file descriptor %d/transport %s) %m",
 988                     fd, nconf->nc_proto);
 989                 return (error);
 990         case TLOOK:
 991                 break;
 992         default:
 993                 (void) syslog(LOG_ERR,
 994                 "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
 995                     fd, nconf->nc_proto, t_errno);
 996                 goto flush_it;
 997         }
 998 
 999         ret = t_look(fd);
1000         switch (ret) {
1001         case 0:
1002                 return (0);
1003         case -1:
1004                 /*
1005                  * System errors are returned to caller.
1006                  */
1007                 if (t_errno == TSYSERR) {
1008                         /*
1009                          * Save the error code across
1010                          * syslog(), just in case
1011                          * syslog() gets its own error
1012                          * and therefore overwrites errno.
1013                          */
1014                         error = errno;
1015                         (void) syslog(LOG_ERR,
1016                             "t_look(file descriptor %d/transport %s) %m",
1017                             fd, nconf->nc_proto);
1018                         return (error);
1019                 }
1020                 (void) syslog(LOG_ERR,
1021                     "t_look(file descriptor %d/transport %s) TLI error %d",
1022                     fd, nconf->nc_proto, t_errno);
1023                 goto flush_it;
1024         case T_UDERR:
1025                 break;
1026         default:
1027                 (void) syslog(LOG_WARNING,
1028         "t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1029                     fd, nconf->nc_proto, ret, T_UDERR);
1030         }
1031 
1032         if (uderr == NULL) {
1033                 /* LINTED pointer alignment */
1034                 uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1035                 if (uderr == NULL) {
1036                         if (t_errno == TSYSERR) {
1037                                 /*
1038                                  * Save the error code across
1039                                  * syslog(), just in case
1040                                  * syslog() gets its own error
1041                                  * and therefore overwrites errno.
1042                                  */
1043                                 error = errno;
1044                                 (void) syslog(LOG_ERR,
1045         "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1046                                     fd, nconf->nc_proto);
1047                                 return (error);
1048                         }
1049                         (void) syslog(LOG_ERR,
1050 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1051                             fd, nconf->nc_proto, t_errno);
1052                         goto flush_it;
1053                 }
1054         }
1055 
1056         ret = t_rcvuderr(fd, uderr);
1057         if (ret == 0) {
1058 
1059                 /*
1060                  * Save the datagram error in errno, so that the
1061                  * %m argument to syslog picks up the error string.
1062                  */
1063                 errno = uderr->error;
1064 
1065                 /*
1066                  * Log the datagram error, then log the host that
1067                  * probably triggerred. Cannot log both in the
1068                  * same transaction because of packet size limitations
1069                  * in /dev/log.
1070                  */
1071                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1072 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1073                     fd, nconf->nc_proto);
1074 
1075                 /*
1076                  * Try to map the client's address back to a
1077                  * name.
1078                  */
1079                 ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1080                 if (ret != -1 && host && host->h_cnt > 0 &&
1081                     host->h_hostservs) {
1082                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1083 "Bad NFS response was sent to client with host name: %s; service port: %s",
1084                     host->h_hostservs->h_host,
1085                     host->h_hostservs->h_serv);
1086                 } else {
1087                         int i, j;
1088                         char *buf;
1089                         char *hex = "0123456789abcdef";
1090 
1091                         /*
1092                          * Mapping failed, print the whole thing
1093                          * in ASCII hex.
1094                          */
1095                         buf = (char *)malloc(uderr->addr.len * 2 + 1);
1096                         for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1097                                 buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1098                                 buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1099                         }
1100                         buf[j] = '\0';
1101                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1102         "Bad NFS response was sent to client with transport address: 0x%s",
1103                     buf);
1104                         free((void *)buf);
1105                 }
1106 
1107                 if (ret == 0 && host != NULL)
1108                         netdir_free((void *)host, ND_HOSTSERVLIST);
1109                 return (0);
1110         }
1111 
1112         switch (t_errno) {
1113         case TNOUDERR:
1114                 goto flush_it;
1115         case TSYSERR:
1116                 /*
1117                  * System errors are returned to caller.
1118                  * Save the error code across
1119                  * syslog(), just in case
1120                  * syslog() gets its own error
1121                  * and therefore overwrites errno.
1122                  */
1123                 error = errno;
1124                 (void) syslog(LOG_ERR,
1125                     "t_rcvuderr(file descriptor %d/transport %s) %m",
1126                     fd, nconf->nc_proto);
1127                 return (error);
1128         default:
1129                 (void) syslog(LOG_ERR,
1130                 "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1131                     fd, nconf->nc_proto, t_errno);
1132                 goto flush_it;
1133         }
1134 
1135 flush_it:
1136         /*
1137          * If we get here, then we could not cope with whatever message
1138          * we attempted to read, so flush it. If we did read a message,
1139          * and one isn't present, that is all right, because fd is in
1140          * nonblocking mode.
1141          */
1142         (void) syslog(LOG_ERR,
1143         "Flushing one input message from <file descriptor %d/transport %s>",
1144             fd, nconf->nc_proto);
1145 
1146         /*
1147          * Read and discard the message. Do this this until there is
1148          * no more control/data in the message or until we get an error.
1149          */
1150         do {
1151                 ctl->maxlen = sizeof (ctlbuf);
1152                 ctl->buf = ctlbuf;
1153                 data->maxlen = sizeof (databuf);
1154                 data->buf = databuf;
1155                 flags = 0;
1156                 ret = getmsg(fd, ctl, data, &flags);
1157                 if (ret == -1)
1158                         return (errno);
1159         } while (ret != 0);
1160 
1161         return (0);
1162 }
1163 
1164 static void
1165 conn_close_oldest(void)
1166 {
1167         int fd;
1168         int i1;
1169 
1170         /*
1171          * Find the oldest connection that is not already in the
1172          * process of shutting down.
1173          */
1174         for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1175                 if (i1 >= num_fds)
1176                         return;
1177                 if (conn_polled[i1].closing == 0)
1178                         break;
1179         }
1180 #ifdef DEBUG
1181         printf("too many connections (%d), releasing oldest (%d)\n",
1182             num_conns, poll_array[i1].fd);
1183 #else
1184         syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1185             num_conns, poll_array[i1].fd);
1186 #endif
1187         fd = poll_array[i1].fd;
1188         if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1189                 /*
1190                  * For politeness, send a T_DISCON_REQ to the transport
1191                  * provider.  We close the stream anyway.
1192                  */
1193                 (void) t_snddis(fd, (struct t_call *)0);
1194                 num_conns--;
1195                 remove_from_poll_list(fd);
1196                 (void) t_close(fd);
1197         } else {
1198                 /*
1199                  * For orderly release, we do not close the stream
1200                  * until the T_ORDREL_IND arrives to complete
1201                  * the handshake.
1202                  */
1203                 if (t_sndrel(fd) == 0)
1204                         conn_polled[i1].closing = 1;
1205         }
1206 }
1207 
1208 static boolean_t
1209 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1210 {
1211         struct conn_ind *conn;
1212         struct conn_ind *next_conn;
1213 
1214         conn = (struct conn_ind *)malloc(sizeof (*conn));
1215         if (conn == NULL) {
1216                 syslog(LOG_ERR, "malloc for listen indication failed");
1217                 return (FALSE);
1218         }
1219 
1220         /* LINTED pointer alignment */
1221         conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1222         if (conn->conn_call == NULL) {
1223                 free((char *)conn);
1224                 nfslib_log_tli_error("t_alloc", fd, nconf);
1225                 return (FALSE);
1226         }
1227 
1228         if (t_listen(fd, conn->conn_call) == -1) {
1229                 nfslib_log_tli_error("t_listen", fd, nconf);
1230                 (void) t_free((char *)conn->conn_call, T_CALL);
1231                 free((char *)conn);
1232                 return (FALSE);
1233         }
1234 
1235         if (conn->conn_call->udata.len > 0) {
1236                 syslog(LOG_WARNING,
1237         "rejecting inbound connection(%s) with %d bytes of connect data",
1238                     nconf->nc_proto, conn->conn_call->udata.len);
1239 
1240                 conn->conn_call->udata.len = 0;
1241                 (void) t_snddis(fd, conn->conn_call);
1242                 (void) t_free((char *)conn->conn_call, T_CALL);
1243                 free((char *)conn);
1244                 return (FALSE);
1245         }
1246 
1247         if ((next_conn = *connp) != NULL) {
1248                 next_conn->conn_prev->conn_next = conn;
1249                 conn->conn_next = next_conn;
1250                 conn->conn_prev = next_conn->conn_prev;
1251                 next_conn->conn_prev = conn;
1252         } else {
1253                 conn->conn_next = conn;
1254                 conn->conn_prev = conn;
1255                 *connp = conn;
1256         }
1257         return (TRUE);
1258 }
1259 
1260 static int
1261 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1262 {
1263         struct conn_ind *conn;
1264         struct t_discon discon;
1265 
1266         discon.udata.buf = (char *)0;
1267         discon.udata.maxlen = 0;
1268         if (t_rcvdis(fd, &discon) == -1) {
1269                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1270                 return (-1);
1271         }
1272 
1273         conn = *connp;
1274         if (conn == NULL)
1275                 return (0);
1276 
1277         do {
1278                 if (conn->conn_call->sequence == discon.sequence) {
1279                         if (conn->conn_next == conn)
1280                                 *connp = (struct conn_ind *)0;
1281                         else {
1282                                 if (conn == *connp) {
1283                                         *connp = conn->conn_next;
1284                                 }
1285                                 conn->conn_next->conn_prev = conn->conn_prev;
1286                                 conn->conn_prev->conn_next = conn->conn_next;
1287                         }
1288                         free((char *)conn);
1289                         break;
1290                 }
1291                 conn = conn->conn_next;
1292         } while (conn != *connp);
1293 
1294         return (0);
1295 }
1296 
1297 static void
1298 cots_listen_event(int fd, int conn_index)
1299 {
1300         struct t_call *call;
1301         struct conn_ind *conn;
1302         struct conn_ind *conn_head;
1303         int event;
1304         struct netconfig *nconf = &conn_polled[conn_index].nc;
1305         int new_fd;
1306         struct netbuf addrmask;
1307         int ret = 0;
1308         char *clnt;
1309         char *clnt_uaddr = NULL;
1310         struct nd_hostservlist *clnt_serv = NULL;
1311 
1312         conn_head = (struct conn_ind *)0;
1313         (void) conn_get(fd, nconf, &conn_head);
1314 
1315         while ((conn = conn_head) != NULL) {
1316                 conn_head = conn->conn_next;
1317                 if (conn_head == conn)
1318                         conn_head = (struct conn_ind *)0;
1319                 else {
1320                         conn_head->conn_prev = conn->conn_prev;
1321                         conn->conn_prev->conn_next = conn_head;
1322                 }
1323                 call = conn->conn_call;
1324                 free((char *)conn);
1325 
1326                 /*
1327                  * If we have already accepted the maximum number of
1328                  * connections allowed on the command line, then drop
1329                  * the oldest connection (for any protocol) before
1330                  * accepting the new connection.  Unless explicitly
1331                  * set on the command line, max_conns_allowed is -1.
1332                  */
1333                 if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1334                         conn_close_oldest();
1335 
1336                 /*
1337                  * Create a new transport endpoint for the same proto as
1338                  * the listener.
1339                  */
1340                 new_fd = nfslib_transport_open(nconf);
1341                 if (new_fd == -1) {
1342                         call->udata.len = 0;
1343                         (void) t_snddis(fd, call);
1344                         (void) t_free((char *)call, T_CALL);
1345                         syslog(LOG_ERR, "Cannot establish transport over %s",
1346                             nconf->nc_device);
1347                         continue;
1348                 }
1349 
1350                 /* Bind to a generic address/port for the accepting stream. */
1351                 if (t_bind(new_fd, (struct t_bind *)NULL,
1352                     (struct t_bind *)NULL) == -1) {
1353                         nfslib_log_tli_error("t_bind", new_fd, nconf);
1354                         call->udata.len = 0;
1355                         (void) t_snddis(fd, call);
1356                         (void) t_free((char *)call, T_CALL);
1357                         (void) t_close(new_fd);
1358                         continue;
1359                 }
1360 
1361                 while (t_accept(fd, new_fd, call) == -1) {
1362                         if (t_errno != TLOOK) {
1363 #ifdef DEBUG
1364                                 nfslib_log_tli_error("t_accept", fd, nconf);
1365 #endif
1366                                 call->udata.len = 0;
1367                                 (void) t_snddis(fd, call);
1368                                 (void) t_free((char *)call, T_CALL);
1369                                 (void) t_close(new_fd);
1370                                 goto do_next_conn;
1371                         }
1372                         while (event = t_look(fd)) {
1373                                 switch (event) {
1374                                 case T_LISTEN:
1375 #ifdef DEBUG
1376                                         printf(
1377 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1378 #endif
1379                                         (void) conn_get(fd, nconf, &conn_head);
1380                                         continue;
1381                                 case T_DISCONNECT:
1382 #ifdef DEBUG
1383                                         printf(
1384         "cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1385                                             nconf->nc_proto);
1386 #endif
1387                                         (void) discon_get(fd, nconf,
1388                                             &conn_head);
1389                                         continue;
1390                                 default:
1391                                         syslog(LOG_ERR,
1392                         "unexpected event 0x%x during accept processing (%s)",
1393                                             event, nconf->nc_proto);
1394                                         call->udata.len = 0;
1395                                         (void) t_snddis(fd, call);
1396                                         (void) t_free((char *)call, T_CALL);
1397                                         (void) t_close(new_fd);
1398                                         goto do_next_conn;
1399                                 }
1400                         }
1401                 }
1402 
1403                 if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1404                         (void) syslog(LOG_ERR,
1405                             "Cannot set address mask for %s",
1406                             nconf->nc_netid);
1407                         return;
1408                 }
1409 
1410                 /* Tell KRPC about the new stream. */
1411                 if (Mysvc4 != NULL)
1412                         ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1413                             NFS4_KRPC_START, &call->addr);
1414                 else
1415                         ret = (*Mysvc)(new_fd, addrmask, nconf);
1416 
1417                 if (ret < 0) {
1418                         if (errno != ENOTCONN) {
1419                                 syslog(LOG_ERR,
1420                                     "unable to register new connection: %m");
1421                         } else {
1422                                 /*
1423                                  * This is the only error that could be
1424                                  * caused by the client, so who was it?
1425                                  */
1426                                 if (netdir_getbyaddr(nconf, &clnt_serv,
1427                                     &(call->addr)) == ND_OK &&
1428                                     clnt_serv->h_cnt > 0)
1429                                         clnt = clnt_serv->h_hostservs->h_host;
1430                                 else
1431                                         clnt = clnt_uaddr = taddr2uaddr(nconf,
1432                                             &(call->addr));
1433                                 /*
1434                                  * If we don't know who the client was,
1435                                  * remain silent.
1436                                  */
1437                                 if (clnt)
1438                                         syslog(LOG_ERR,
1439 "unable to register new connection: client %s has dropped connection", clnt);
1440                                 if (clnt_serv)
1441                                         netdir_free(clnt_serv, ND_HOSTSERVLIST);
1442                                 if (clnt_uaddr)
1443                                         free(clnt_uaddr);
1444                         }
1445                         free(addrmask.buf);
1446                         (void) t_snddis(new_fd, (struct t_call *)0);
1447                         (void) t_free((char *)call, T_CALL);
1448                         (void) t_close(new_fd);
1449                         goto do_next_conn;
1450                 }
1451 
1452                 free(addrmask.buf);
1453                 (void) t_free((char *)call, T_CALL);
1454 
1455                 /*
1456                  * Poll on the new descriptor so that we get disconnect
1457                  * and orderly release indications.
1458                  */
1459                 num_conns++;
1460                 add_to_poll_list(new_fd, nconf);
1461 
1462                 /* Reset nconf in case it has been moved. */
1463                 nconf = &conn_polled[conn_index].nc;
1464 do_next_conn:;
1465         }
1466 }
1467 
1468 static int
1469 do_poll_cots_action(int fd, int conn_index)
1470 {
1471         char buf[256];
1472         int event;
1473         int i1;
1474         int flags;
1475         struct conn_entry *connent = &conn_polled[conn_index];
1476         struct netconfig *nconf = &(connent->nc);
1477         const char *errorstr;
1478 
1479         while (event = t_look(fd)) {
1480                 switch (event) {
1481                 case T_LISTEN:
1482 #ifdef DEBUG
1483 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1484 #endif
1485                         cots_listen_event(fd, conn_index);
1486                         break;
1487 
1488                 case T_DATA:
1489 #ifdef DEBUG
1490 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1491 #endif
1492                         /*
1493                          * Receive a private notification from CONS rpcmod.
1494                          */
1495                         i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1496                         if (i1 == -1) {
1497                                 syslog(LOG_ERR, "t_rcv failed");
1498                                 break;
1499                         }
1500                         if (i1 < sizeof (int))
1501                                 break;
1502                         i1 = BE32_TO_U32(buf);
1503                         if (i1 == 1 || i1 == 2) {
1504                                 /*
1505                                  * This connection has been idle for too long,
1506                                  * so release it as politely as we can.  If we
1507                                  * have already initiated an orderly release
1508                                  * and we get notified that the stream is
1509                                  * still idle, pull the plug.  This prevents
1510                                  * hung connections from continuing to consume
1511                                  * resources.
1512                                  */
1513 #ifdef DEBUG
1514 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1515 printf("initiating orderly release of idle connection\n");
1516 #endif
1517                                 if (nconf->nc_semantics == NC_TPI_COTS ||
1518                                     connent->closing != 0) {
1519                                         (void) t_snddis(fd, (struct t_call *)0);
1520                                         goto fdclose;
1521                                 }
1522                                 /*
1523                                  * For NC_TPI_COTS_ORD, the stream is closed
1524                                  * and removed from the poll list when the
1525                                  * T_ORDREL is received from the provider.  We
1526                                  * don't wait for it here because it may take
1527                                  * a while for the transport to shut down.
1528                                  */
1529                                 if (t_sndrel(fd) == -1) {
1530                                         syslog(LOG_ERR,
1531                                         "unable to send orderly release %m");
1532                                 }
1533                                 connent->closing = 1;
1534                         } else
1535                                 syslog(LOG_ERR,
1536                                 "unexpected event from CONS rpcmod %d", i1);
1537                         break;
1538 
1539                 case T_ORDREL:
1540 #ifdef DEBUG
1541 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1542 #endif
1543                         /* Perform an orderly release. */
1544                         if (t_rcvrel(fd) == 0) {
1545                                 /* T_ORDREL on listen fd's should be ignored */
1546                                 if (!is_listen_fd_index(conn_index)) {
1547                                         (void) t_sndrel(fd);
1548                                         goto fdclose;
1549                                 }
1550                                 break;
1551 
1552                         } else if (t_errno == TLOOK) {
1553                                 break;
1554                         } else {
1555                                 nfslib_log_tli_error("t_rcvrel", fd, nconf);
1556 
1557                                 /*
1558                                  * check to make sure we do not close
1559                                  * listen fd
1560                                  */
1561                                 if (is_listen_fd_index(conn_index))
1562                                         break;
1563                                 else
1564                                         goto fdclose;
1565                         }
1566 
1567                 case T_DISCONNECT:
1568 #ifdef DEBUG
1569 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1570 #endif
1571                         if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1572                                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1573 
1574                         /*
1575                          * T_DISCONNECT on listen fd's should be ignored.
1576                          */
1577                         if (is_listen_fd_index(conn_index))
1578                                 break;
1579                         else
1580                                 goto fdclose;
1581 
1582                 case T_ERROR:
1583                 default:
1584                         if (event == T_ERROR || t_errno == TSYSERR) {
1585                                 if ((errorstr = strerror(errno)) == NULL) {
1586                                         (void) sprintf(buf,
1587                                             "Unknown error num %d", errno);
1588                                         errorstr = (const char *) buf;
1589                                 }
1590                         } else if (event == -1)
1591                                 errorstr = t_strerror(t_errno);
1592                         else
1593                                 errorstr = "";
1594                         syslog(LOG_ERR,
1595                             "unexpected TLI event (0x%x) on "
1596                             "connection-oriented transport(%s,%d):%s",
1597                             event, nconf->nc_proto, fd, errorstr);
1598 fdclose:
1599                         num_conns--;
1600                         remove_from_poll_list(fd);
1601                         (void) t_close(fd);
1602                         return (0);
1603                 }
1604         }
1605 
1606         return (0);
1607 }
1608 
1609 static char *
1610 serv_name_to_port_name(char *name)
1611 {
1612         /*
1613          * Map service names (used primarily in logging) to
1614          * RPC port names (used by netdir_*() routines).
1615          */
1616         if (strcmp(name, "NFS") == 0) {
1617                 return ("nfs");
1618         } else if (strcmp(name, "NLM") == 0) {
1619                 return ("lockd");
1620         } else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1621                 return ("nfs4_callback");
1622         }
1623 
1624         return ("unrecognized");
1625 }
1626 
1627 static int
1628 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1629                 struct netconfig **retnconf, int sndbufsz, int rcvbufsz)
1630 {
1631         struct netconfig *nconf;
1632         NCONF_HANDLE *nc;
1633         struct nd_hostserv hs;
1634 
1635         hs.h_host = HOST_SELF;
1636         hs.h_serv = serv_name_to_port_name(serv);
1637 
1638         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1639                 syslog(LOG_ERR, "setnetconfig failed: %m");
1640                 return (-1);
1641         }
1642         while (nconf = getnetconfig(nc)) {
1643                 if (OK_TPI_TYPE(nconf) &&
1644                     strcmp(nconf->nc_device, provider) == 0) {
1645                         *retnconf = nconf;
1646                         return (nfslib_bindit(nconf, addr, &hs,
1647                             listen_backlog, sndbufsz, rcvbufsz));
1648                 }
1649         }
1650         (void) endnetconfig(nc);
1651 
1652         syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1653             provider);
1654         return (-1);
1655 }
1656 
1657 static int
1658 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1659                 struct netconfig **retnconf, int sndbufsz, int rcvbufsz)
1660 {
1661         struct netconfig *nconf;
1662         NCONF_HANDLE *nc = NULL;
1663         struct nd_hostserv hs;
1664 
1665         hs.h_host = HOST_SELF;
1666         hs.h_serv = serv_name_to_port_name(serv);
1667 
1668         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1669                 syslog(LOG_ERR, "setnetconfig failed: %m");
1670                 return (-1);
1671         }
1672         while (nconf = getnetconfig(nc)) {
1673                 if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1674                         *retnconf = nconf;
1675                         return (nfslib_bindit(nconf, addr, &hs,
1676                             listen_backlog, sndbufsz, rcvbufsz));
1677                 }
1678         }
1679         (void) endnetconfig(nc);
1680 
1681         syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1682             proto);
1683         return (-1);
1684 }
1685 
1686 #include <netinet/in.h>
1687 
1688 /*
1689  * Create an address mask appropriate for the transport.
1690  * The mask is used to obtain the host-specific part of
1691  * a network address when comparing addresses.
1692  * For an internet address the host-specific part is just
1693  * the 32 bit IP address and this part of the mask is set
1694  * to all-ones. The port number part of the mask is zeroes.
1695  */
1696 static int
1697 set_addrmask(fd, nconf, mask)
1698         struct netconfig *nconf;
1699         struct netbuf *mask;
1700 {
1701         struct t_info info;
1702 
1703         /*
1704          * Find the size of the address we need to mask.
1705          */
1706         if (t_getinfo(fd, &info) < 0) {
1707                 t_error("t_getinfo");
1708                 return (-1);
1709         }
1710         mask->len = mask->maxlen = info.addr;
1711         if (info.addr <= 0) {
1712                 syslog(LOG_ERR, "set_addrmask: address size: %ld",
1713                         info.addr);
1714                 return (-1);
1715         }
1716 
1717         mask->buf = (char *)malloc(mask->len);
1718         if (mask->buf == NULL) {
1719                 syslog(LOG_ERR, "set_addrmask: no memory");
1720                 return (-1);
1721         }
1722         (void) memset(mask->buf, 0, mask->len);   /* reset all mask bits */
1723 
1724         if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1725                 /*
1726                  * Set the mask so that the port is ignored.
1727                  */
1728                 /* LINTED pointer alignment */
1729                 ((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1730                                                                 (ulong_t)~0;
1731                 /* LINTED pointer alignment */
1732                 ((struct sockaddr_in *)mask->buf)->sin_family =
1733                                                                 (ushort_t)~0;
1734         } else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1735                 /* LINTED pointer alignment */
1736                 (void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1737                         (uchar_t)~0, sizeof (struct in6_addr));
1738                 /* LINTED pointer alignment */
1739                 ((struct sockaddr_in6 *)mask->buf)->sin6_family =
1740                                                                 (ushort_t)~0;
1741         } else {
1742 
1743                 /*
1744                  * Set all mask bits.
1745                  */
1746                 (void) memset(mask->buf, 0xFF, mask->len);
1747         }
1748         return (0);
1749 }
1750 
1751 /*
1752  * For listen fd's index is always less than end_listen_fds.
1753  * end_listen_fds is defined externally in the daemon that uses this library.
1754  * It's value is equal to the number of open file descriptors after the
1755  * last listen end point was opened but before any connection was accepted.
1756  */
1757 static int
1758 is_listen_fd_index(int index)
1759 {
1760         return (index < end_listen_fds);
1761 }