Print this page
4953763 Need way to configure NFS window sizes without changing system wide defaults
6216670 NFS server needs a bigger transmit buffer
*** 188,197 ****
--- 188,198 ----
#include <rpc/types.h>
#include <rpc/xdr.h>
#include <rpc/auth.h>
#include <rpc/clnt.h>
#include <rpc/rpc_msg.h>
+ #include <nfs/nfs.h>
#define COTS_DEFAULT_ALLOCSIZE 2048
#define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */
#define MSG_OFFSET 128 /* offset of call into the mblk */
*** 379,388 ****
--- 380,393 ----
static bool_t connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *,
int, calllist_t *, int *, bool_t reconnect,
const struct timeval *, bool_t, cred_t *);
+ static bool_t connmgr_getopt_int(queue_t *wq, int level, int name, int *val,
+ calllist_t *e, cred_t *cr);
+ static bool_t connmgr_setopt_int(queue_t *, int, int, int,
+ calllist_t *, cred_t *cr);
static bool_t connmgr_setopt(queue_t *, int, int, calllist_t *, cred_t *cr);
static void connmgr_sndrel(struct cm_xprt *);
static void connmgr_snddis(struct cm_xprt *);
static void connmgr_close(struct cm_xprt *);
static void connmgr_release(struct cm_xprt *);
*** 500,510 ****
--- 505,531 ----
*/
int clnt_cots_do_bindresvport = 1;
static zone_key_t zone_cots_key;
+ #define TWO_GIGB 0x80000000
+ int nfsd_port = NFS_PORT;
/*
+ * Defaults TCP send and receive buffer size for NFS connections.
+ * These values can be tuned by /etc/default.
+ */
+ int nfs_send_bufsz = 1024*1024;
+ int nfs_recv_bufsz = 1024*1024;
+ /*
+ * To use system-wide default for TCP send and receive buffer size,
+ * use /etc/system to set nfs_default_bufsz to 1:
+ *
+ * set rpcmod:nfs_default_bufsz=1
+ */
+ int nfs_default_bufsz = 0;
+
+ /*
* We need to do this after all kernel threads in the zone have exited.
*/
/* ARGSUSED */
static void
clnt_zone_destroy(zoneid_t zoneid, void *unused)
*** 2556,2565 ****
--- 2577,2640 ----
cv_signal(&cm_entry->x_cv);
mutex_exit(&cm_entry->x_lock);
}
/*
+ * Set TCP receive and xmit buffer size for NFS connections.
+ */
+ static bool_t
+ connmgr_nfs_setbufsz(calllist_t *e, int addrfmly, struct netbuf *addr,
+ queue_t *wq, cred_t *cr)
+ {
+ struct sockaddr_in *sa;
+ int ok = FALSE;
+ int val;
+ uint32_t sbufsz, rbufsz;
+
+ if (nfs_default_bufsz ||
+ (addrfmly != AF_INET && addrfmly != AF_INET6))
+ return (FALSE);
+
+ sa = (struct sockaddr_in *)addr->buf;
+ if (ntohs(sa->sin_port) != nfsd_port)
+ return (FALSE);
+ /*
+ * For system with 2GB, or less, of physical memory set send
+ * and receive buffer size to half of nfs_send_bufsz and
+ * nfs_recv_bufsz respectively.
+ */
+ if (ptob(physmem) <= TWO_GIGB) {
+ sbufsz = nfs_send_bufsz >> 1;
+ rbufsz = nfs_recv_bufsz >> 1;
+ } else {
+ sbufsz = nfs_send_bufsz;
+ rbufsz = nfs_recv_bufsz;
+ }
+ /*
+ * Only set new buffer size if it's larger than the system
+ * default buffer size. If smaller buffer size is needed
+ * then use /etc/system to set nfs_default_bufsz to 1.
+ */
+ ok = connmgr_getopt_int(wq, SOL_SOCKET, SO_RCVBUF, &val, e, cr);
+ if ((ok == TRUE) && (val < sbufsz)) {
+ ok = connmgr_setopt_int(wq, SOL_SOCKET, SO_RCVBUF,
+ sbufsz, e, cr);
+ DTRACE_PROBE2(connmgr_nfs_rcvbufsz__setopt,
+ int, ok, calllist_t *, e);
+ }
+
+ ok = connmgr_getopt_int(wq, SOL_SOCKET, SO_SNDBUF, &val, e, cr);
+ if ((ok == TRUE) && (val < rbufsz)) {
+ ok = connmgr_setopt_int(wq, SOL_SOCKET, SO_SNDBUF,
+ rbufsz, e, cr);
+ DTRACE_PROBE2(connmgr_nfs_sndbufsz__setopt,
+ int, ok, calllist_t *, e);
+ }
+ return (TRUE);
+ }
+
+ /*
* Given an open stream, connect to the remote. Returns true if connected,
* false otherwise.
*/
static bool_t
connmgr_connect(
*** 2607,2616 ****
--- 2682,2694 ----
e->call_status = RPC_SYSTEMERROR;
e->call_reason = ENOSR;
return (FALSE);
}
+ /* Set TCP buffer size for NFS connections if needed */
+ (void) connmgr_nfs_setbufsz(e, addrfmly, addr, wq, cr);
+
mp->b_datap->db_type = M_PROTO;
tcr = (struct T_conn_req *)mp->b_rptr;
bzero(tcr, sizeof (*tcr));
tcr->PRIM_type = T_CONN_REQ;
tcr->DEST_length = addr->len;
*** 2762,2775 ****
kstat_install(cm_entry->x_ksp);
return (TRUE);
}
/*
* Called by connmgr_connect to set an option on the new stream.
*/
static bool_t
! connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr)
{
mblk_t *mp;
struct opthdr *opt;
struct T_optmgmt_req *tor;
struct timeval waitp;
--- 2840,2968 ----
kstat_install(cm_entry->x_ksp);
return (TRUE);
}
/*
+ * Verify that the specified offset falls within the mblk and
+ * that the resulting pointer is aligned.
+ * Returns NULL if not.
+ *
+ * code from fs/sockfs/socksubr.c
+ */
+ static void *
+ connmgr_opt_getoff(mblk_t *mp, t_uscalar_t offset,
+ t_uscalar_t length, uint_t align_size)
+ {
+ uintptr_t ptr1, ptr2;
+
+ ASSERT(mp && mp->b_wptr >= mp->b_rptr);
+ ptr1 = (uintptr_t)mp->b_rptr + offset;
+ ptr2 = (uintptr_t)ptr1 + length;
+ if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
+ return (NULL);
+ }
+ if ((ptr1 & (align_size - 1)) != 0) {
+ return (NULL);
+ }
+ return ((void *)ptr1);
+ }
+
+ static bool_t
+ connmgr_getopt_int(queue_t *wq, int level, int name, int *val,
+ calllist_t *e, cred_t *cr)
+ {
+ mblk_t *mp;
+ struct opthdr *opt, *opt_res;
+ struct T_optmgmt_req *tor;
+ struct T_optmgmt_ack *opt_ack;
+ struct timeval waitp;
+ int error;
+
+ mp = allocb_cred(sizeof (struct T_optmgmt_req) +
+ sizeof (struct opthdr) + sizeof (int), cr, NOPID);
+ if (mp == NULL) {
+ RPCLOG0(1, "connmgr_getopt: cannot alloc mp for option "
+ "request\n");
+ return (FALSE);
+ }
+
+ mp->b_datap->db_type = M_PROTO;
+ tor = (struct T_optmgmt_req *)(mp->b_rptr);
+ tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
+ tor->MGMT_flags = T_CURRENT;
+ tor->OPT_length = sizeof (struct opthdr) + sizeof (int);
+ tor->OPT_offset = sizeof (struct T_optmgmt_req);
+
+ opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req));
+ opt->level = level;
+ opt->name = name;
+ opt->len = sizeof (int);
+ mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
+ sizeof (int);
+
+ /*
+ * We will use this connection regardless
+ * of whether or not the option is readable.
+ */
+ if (clnt_dispatch_send(wq, mp, e, 0, 0) != RPC_SUCCESS) {
+ DTRACE_PROBE(krpc__e__connmgr__getopt__cantsend);
+ freemsg(mp);
+ return (FALSE);
+ }
+
+ mutex_enter(&clnt_pending_lock);
+
+ waitp.tv_sec = clnt_cots_min_conntout;
+ waitp.tv_usec = 0;
+ error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1);
+
+ if (e->call_prev)
+ e->call_prev->call_next = e->call_next;
+ else
+ clnt_pending = e->call_next;
+ if (e->call_next)
+ e->call_next->call_prev = e->call_prev;
+ mutex_exit(&clnt_pending_lock);
+
+ /* get reply message */
+ mp = e->call_reply;
+ e->call_reply = NULL;
+
+ if ((!mp) || (e->call_status != RPC_SUCCESS) || (error != 0)) {
+
+ DTRACE_PROBE4(connmgr_getopt__failed, int, name,
+ int, e->call_status, int, error, mblk_t *, mp);
+
+ if (mp)
+ freemsg(mp);
+ return (FALSE);
+ }
+
+ opt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
+ opt_res = (struct opthdr *)connmgr_opt_getoff(mp, opt_ack->OPT_offset,
+ opt_ack->OPT_length, __TPI_ALIGN_SIZE);
+
+ if (!opt_res) {
+ DTRACE_PROBE4(connmgr_getopt__optres, mblk_t *, mp, int, name,
+ int, opt_ack->OPT_offset, int, opt_ack->OPT_length);
+ freemsg(mp);
+ return (FALSE);
+ }
+ *val = *(int *)&opt_res[1];
+
+ DTRACE_PROBE2(connmgr_getopt__ok, int, name, int, *val);
+
+ freemsg(mp);
+ return (TRUE);
+ }
+
+ /*
* Called by connmgr_connect to set an option on the new stream.
*/
static bool_t
! connmgr_setopt_int(queue_t *wq, int level, int name, int val,
! calllist_t *e, cred_t *cr)
{
mblk_t *mp;
struct opthdr *opt;
struct T_optmgmt_req *tor;
struct timeval waitp;
*** 2792,2802 ****
opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req));
opt->level = level;
opt->name = name;
opt->len = sizeof (int);
! *(int *)((char *)opt + sizeof (*opt)) = 1;
mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
sizeof (int);
/*
* We will use this connection regardless
--- 2985,2995 ----
opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req));
opt->level = level;
opt->name = name;
opt->len = sizeof (int);
! *(int *)((char *)opt + sizeof (*opt)) = val;
mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
sizeof (int);
/*
* We will use this connection regardless
*** 2833,2842 ****
--- 3026,3041 ----
}
RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name);
return (TRUE);
}
+ static bool_t
+ connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr)
+ {
+ return (connmgr_setopt_int(wq, level, name, 1, e, cr));
+ }
+
#ifdef DEBUG
/*
* This is a knob to let us force code coverage in allocation failure
* case.