Print this page
4953763 Need way to configure NFS window sizes without changing system wide defaults
6216670 NFS server needs a bigger transmit buffer

*** 188,197 **** --- 188,198 ---- #include <rpc/types.h> #include <rpc/xdr.h> #include <rpc/auth.h> #include <rpc/clnt.h> #include <rpc/rpc_msg.h> + #include <nfs/nfs.h> #define COTS_DEFAULT_ALLOCSIZE 2048 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */ #define MSG_OFFSET 128 /* offset of call into the mblk */
*** 379,388 **** --- 380,393 ---- static bool_t connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *, int, calllist_t *, int *, bool_t reconnect, const struct timeval *, bool_t, cred_t *); + static bool_t connmgr_getopt_int(queue_t *wq, int level, int name, int *val, + calllist_t *e, cred_t *cr); + static bool_t connmgr_setopt_int(queue_t *, int, int, int, + calllist_t *, cred_t *cr); static bool_t connmgr_setopt(queue_t *, int, int, calllist_t *, cred_t *cr); static void connmgr_sndrel(struct cm_xprt *); static void connmgr_snddis(struct cm_xprt *); static void connmgr_close(struct cm_xprt *); static void connmgr_release(struct cm_xprt *);
*** 500,510 **** --- 505,531 ---- */ int clnt_cots_do_bindresvport = 1; static zone_key_t zone_cots_key; + #define TWO_GIGB 0x80000000 + int nfsd_port = NFS_PORT; /* + * Defaults TCP send and receive buffer size for NFS connections. + * These values can be tuned by /etc/default. + */ + int nfs_send_bufsz = 1024*1024; + int nfs_recv_bufsz = 1024*1024; + /* + * To use system-wide default for TCP send and receive buffer size, + * use /etc/system to set nfs_default_bufsz to 1: + * + * set rpcmod:nfs_default_bufsz=1 + */ + int nfs_default_bufsz = 0; + + /* * We need to do this after all kernel threads in the zone have exited. */ /* ARGSUSED */ static void clnt_zone_destroy(zoneid_t zoneid, void *unused)
*** 2556,2565 **** --- 2577,2640 ---- cv_signal(&cm_entry->x_cv); mutex_exit(&cm_entry->x_lock); } /* + * Set TCP receive and xmit buffer size for NFS connections. + */ + static bool_t + connmgr_nfs_setbufsz(calllist_t *e, int addrfmly, struct netbuf *addr, + queue_t *wq, cred_t *cr) + { + struct sockaddr_in *sa; + int ok = FALSE; + int val; + uint32_t sbufsz, rbufsz; + + if (nfs_default_bufsz || + (addrfmly != AF_INET && addrfmly != AF_INET6)) + return (FALSE); + + sa = (struct sockaddr_in *)addr->buf; + if (ntohs(sa->sin_port) != nfsd_port) + return (FALSE); + /* + * For system with 2GB, or less, of physical memory set send + * and receive buffer size to half of nfs_send_bufsz and + * nfs_recv_bufsz respectively. + */ + if (ptob(physmem) <= TWO_GIGB) { + sbufsz = nfs_send_bufsz >> 1; + rbufsz = nfs_recv_bufsz >> 1; + } else { + sbufsz = nfs_send_bufsz; + rbufsz = nfs_recv_bufsz; + } + /* + * Only set new buffer size if it's larger than the system + * default buffer size. If smaller buffer size is needed + * then use /etc/system to set nfs_default_bufsz to 1. + */ + ok = connmgr_getopt_int(wq, SOL_SOCKET, SO_RCVBUF, &val, e, cr); + if ((ok == TRUE) && (val < sbufsz)) { + ok = connmgr_setopt_int(wq, SOL_SOCKET, SO_RCVBUF, + sbufsz, e, cr); + DTRACE_PROBE2(connmgr_nfs_rcvbufsz__setopt, + int, ok, calllist_t *, e); + } + + ok = connmgr_getopt_int(wq, SOL_SOCKET, SO_SNDBUF, &val, e, cr); + if ((ok == TRUE) && (val < rbufsz)) { + ok = connmgr_setopt_int(wq, SOL_SOCKET, SO_SNDBUF, + rbufsz, e, cr); + DTRACE_PROBE2(connmgr_nfs_sndbufsz__setopt, + int, ok, calllist_t *, e); + } + return (TRUE); + } + + /* * Given an open stream, connect to the remote. Returns true if connected, * false otherwise. */ static bool_t connmgr_connect(
*** 2607,2616 **** --- 2682,2694 ---- e->call_status = RPC_SYSTEMERROR; e->call_reason = ENOSR; return (FALSE); } + /* Set TCP buffer size for NFS connections if needed */ + (void) connmgr_nfs_setbufsz(e, addrfmly, addr, wq, cr); + mp->b_datap->db_type = M_PROTO; tcr = (struct T_conn_req *)mp->b_rptr; bzero(tcr, sizeof (*tcr)); tcr->PRIM_type = T_CONN_REQ; tcr->DEST_length = addr->len;
*** 2762,2775 **** kstat_install(cm_entry->x_ksp); return (TRUE); } /* * Called by connmgr_connect to set an option on the new stream. */ static bool_t ! connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr) { mblk_t *mp; struct opthdr *opt; struct T_optmgmt_req *tor; struct timeval waitp; --- 2840,2968 ---- kstat_install(cm_entry->x_ksp); return (TRUE); } /* + * Verify that the specified offset falls within the mblk and + * that the resulting pointer is aligned. + * Returns NULL if not. + * + * code from fs/sockfs/socksubr.c + */ + static void * + connmgr_opt_getoff(mblk_t *mp, t_uscalar_t offset, + t_uscalar_t length, uint_t align_size) + { + uintptr_t ptr1, ptr2; + + ASSERT(mp && mp->b_wptr >= mp->b_rptr); + ptr1 = (uintptr_t)mp->b_rptr + offset; + ptr2 = (uintptr_t)ptr1 + length; + if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { + return (NULL); + } + if ((ptr1 & (align_size - 1)) != 0) { + return (NULL); + } + return ((void *)ptr1); + } + + static bool_t + connmgr_getopt_int(queue_t *wq, int level, int name, int *val, + calllist_t *e, cred_t *cr) + { + mblk_t *mp; + struct opthdr *opt, *opt_res; + struct T_optmgmt_req *tor; + struct T_optmgmt_ack *opt_ack; + struct timeval waitp; + int error; + + mp = allocb_cred(sizeof (struct T_optmgmt_req) + + sizeof (struct opthdr) + sizeof (int), cr, NOPID); + if (mp == NULL) { + RPCLOG0(1, "connmgr_getopt: cannot alloc mp for option " + "request\n"); + return (FALSE); + } + + mp->b_datap->db_type = M_PROTO; + tor = (struct T_optmgmt_req *)(mp->b_rptr); + tor->PRIM_type = T_SVR4_OPTMGMT_REQ; + tor->MGMT_flags = T_CURRENT; + tor->OPT_length = sizeof (struct opthdr) + sizeof (int); + tor->OPT_offset = sizeof (struct T_optmgmt_req); + + opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); + opt->level = level; + opt->name = name; + opt->len = sizeof (int); + mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + + sizeof (int); + + /* + * We will use this connection regardless + * of whether or not the option is readable. + */ + if (clnt_dispatch_send(wq, mp, e, 0, 0) != RPC_SUCCESS) { + DTRACE_PROBE(krpc__e__connmgr__getopt__cantsend); + freemsg(mp); + return (FALSE); + } + + mutex_enter(&clnt_pending_lock); + + waitp.tv_sec = clnt_cots_min_conntout; + waitp.tv_usec = 0; + error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1); + + if (e->call_prev) + e->call_prev->call_next = e->call_next; + else + clnt_pending = e->call_next; + if (e->call_next) + e->call_next->call_prev = e->call_prev; + mutex_exit(&clnt_pending_lock); + + /* get reply message */ + mp = e->call_reply; + e->call_reply = NULL; + + if ((!mp) || (e->call_status != RPC_SUCCESS) || (error != 0)) { + + DTRACE_PROBE4(connmgr_getopt__failed, int, name, + int, e->call_status, int, error, mblk_t *, mp); + + if (mp) + freemsg(mp); + return (FALSE); + } + + opt_ack = (struct T_optmgmt_ack *)mp->b_rptr; + opt_res = (struct opthdr *)connmgr_opt_getoff(mp, opt_ack->OPT_offset, + opt_ack->OPT_length, __TPI_ALIGN_SIZE); + + if (!opt_res) { + DTRACE_PROBE4(connmgr_getopt__optres, mblk_t *, mp, int, name, + int, opt_ack->OPT_offset, int, opt_ack->OPT_length); + freemsg(mp); + return (FALSE); + } + *val = *(int *)&opt_res[1]; + + DTRACE_PROBE2(connmgr_getopt__ok, int, name, int, *val); + + freemsg(mp); + return (TRUE); + } + + /* * Called by connmgr_connect to set an option on the new stream. */ static bool_t ! connmgr_setopt_int(queue_t *wq, int level, int name, int val, ! calllist_t *e, cred_t *cr) { mblk_t *mp; struct opthdr *opt; struct T_optmgmt_req *tor; struct timeval waitp;
*** 2792,2802 **** opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); opt->level = level; opt->name = name; opt->len = sizeof (int); ! *(int *)((char *)opt + sizeof (*opt)) = 1; mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + sizeof (int); /* * We will use this connection regardless --- 2985,2995 ---- opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req)); opt->level = level; opt->name = name; opt->len = sizeof (int); ! *(int *)((char *)opt + sizeof (*opt)) = val; mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) + sizeof (int); /* * We will use this connection regardless
*** 2833,2842 **** --- 3026,3041 ---- } RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name); return (TRUE); } + static bool_t + connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr) + { + return (connmgr_setopt_int(wq, level, name, 1, e, cr)); + } + #ifdef DEBUG /* * This is a knob to let us force code coverage in allocation failure * case.