Print this page
4953763 Need way to configure NFS window sizes without changing system wide defaults
6216670 NFS server needs a bigger transmit buffer

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/rpc/clnt_cots.c
          +++ new/usr/src/uts/common/rpc/clnt_cots.c
↓ open down ↓ 182 lines elided ↑ open up ↑
 183  183  #include <sys/sdt.h>
 184  184  
 185  185  #include <netinet/in.h>
 186  186  #include <netinet/tcp.h>
 187  187  
 188  188  #include <rpc/types.h>
 189  189  #include <rpc/xdr.h>
 190  190  #include <rpc/auth.h>
 191  191  #include <rpc/clnt.h>
 192  192  #include <rpc/rpc_msg.h>
      193 +#include <nfs/nfs.h>
 193  194  
 194  195  #define COTS_DEFAULT_ALLOCSIZE  2048
 195  196  
 196  197  #define WIRE_HDR_SIZE   20      /* serialized call header, sans proc number */
 197  198  #define MSG_OFFSET      128     /* offset of call into the mblk */
 198  199  
 199  200  const char *kinet_ntop6(uchar_t *, char *, size_t);
 200  201  
 201  202  static int      clnt_cots_ksettimers(CLIENT *, struct rpc_timers *,
 202  203      struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
↓ open down ↓ 171 lines elided ↑ open up ↑
 374  375  } cku_private_t;
 375  376  
 376  377  static struct cm_xprt *connmgr_wrapconnect(struct cm_xprt *,
 377  378          const struct timeval *, struct netbuf *, int, struct netbuf *,
 378  379          struct rpc_err *, bool_t, bool_t, cred_t *);
 379  380  
 380  381  static bool_t   connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *,
 381  382                                  int, calllist_t *, int *, bool_t reconnect,
 382  383                                  const struct timeval *, bool_t, cred_t *);
 383  384  
      385 +static bool_t  connmgr_getopt_int(queue_t *wq, int level, int name, int *val,
      386 +                                calllist_t *e, cred_t *cr);
      387 +static bool_t   connmgr_setopt_int(queue_t *, int, int, int,
      388 +                                calllist_t *, cred_t *cr);
 384  389  static bool_t   connmgr_setopt(queue_t *, int, int, calllist_t *, cred_t *cr);
 385  390  static void     connmgr_sndrel(struct cm_xprt *);
 386  391  static void     connmgr_snddis(struct cm_xprt *);
 387  392  static void     connmgr_close(struct cm_xprt *);
 388  393  static void     connmgr_release(struct cm_xprt *);
 389  394  static struct cm_xprt *connmgr_wrapget(struct netbuf *, const struct timeval *,
 390  395          cku_private_t *);
 391  396  
 392  397  static struct cm_xprt *connmgr_get(struct netbuf *, const struct timeval *,
 393  398          struct netbuf *, int, struct netbuf *, struct rpc_err *, dev_t,
↓ open down ↓ 101 lines elided ↑ open up ↑
 495  500   *
 496  501   * If the value is set to 1 the default will be to select a reserved
 497  502   * (aka privileged) port, if the value is zero the default will be to
 498  503   * use non-reserved ports.  Users of kRPC may override this by using
 499  504   * CLNT_CONTROL() and CLSET_BINDRESVPORT.
 500  505   */
 501  506  int clnt_cots_do_bindresvport = 1;
 502  507  
 503  508  static zone_key_t zone_cots_key;
 504  509  
      510 +#define TWO_GIGB        0x80000000
      511 +int nfsd_port = NFS_PORT;
 505  512  /*
      513 + * Defaults TCP send and receive buffer size for NFS connections.
      514 + * These values can be tuned by /etc/default.
      515 + */
      516 +int nfs_send_bufsz = 1024*1024;
      517 +int nfs_recv_bufsz = 1024*1024;
      518 +/*
      519 + * To use system-wide default for TCP send and receive buffer size,
      520 + * use /etc/system to set nfs_default_bufsz to 1:
      521 + *
      522 + * set rpcmod:nfs_default_bufsz=1
      523 + */
      524 +int nfs_default_bufsz = 0;
      525 +
      526 +/*
 506  527   * We need to do this after all kernel threads in the zone have exited.
 507  528   */
 508  529  /* ARGSUSED */
 509  530  static void
 510  531  clnt_zone_destroy(zoneid_t zoneid, void *unused)
 511  532  {
 512  533          struct cm_xprt **cmp;
 513  534          struct cm_xprt *cm_entry;
 514  535          struct cm_xprt *freelist = NULL;
 515  536  
↓ open down ↓ 2035 lines elided ↑ open up ↑
2551 2572  connmgr_release(struct cm_xprt *cm_entry)
2552 2573  {
2553 2574          mutex_enter(&cm_entry->x_lock);
2554 2575          cm_entry->x_ref--;
2555 2576          if (cm_entry->x_ref == 0)
2556 2577                  cv_signal(&cm_entry->x_cv);
2557 2578          mutex_exit(&cm_entry->x_lock);
2558 2579  }
2559 2580  
2560 2581  /*
     2582 + * Set TCP receive and xmit buffer size for NFS connections.
     2583 + */
     2584 +static bool_t
     2585 +connmgr_nfs_setbufsz(calllist_t *e, int addrfmly, struct netbuf *addr,
     2586 +    queue_t *wq, cred_t *cr)
     2587 +{
     2588 +        struct sockaddr_in *sa;
     2589 +        int ok = FALSE;
     2590 +        int val;
     2591 +        uint32_t sbufsz, rbufsz;
     2592 +
     2593 +        if (nfs_default_bufsz ||
     2594 +            (addrfmly != AF_INET && addrfmly != AF_INET6))
     2595 +                return (FALSE);
     2596 +
     2597 +        sa = (struct sockaddr_in *)addr->buf;
     2598 +        if (ntohs(sa->sin_port) != nfsd_port)
     2599 +                return (FALSE);
     2600 +        /*
     2601 +         * For system with 2GB, or less, of physical memory set send
     2602 +         * and receive buffer size to half of nfs_send_bufsz and
     2603 +         * nfs_recv_bufsz respectively.
     2604 +         */
     2605 +        if (ptob(physmem) <= TWO_GIGB) {
     2606 +                sbufsz = nfs_send_bufsz >> 1;
     2607 +                rbufsz = nfs_recv_bufsz >> 1;
     2608 +        } else {
     2609 +                sbufsz = nfs_send_bufsz;
     2610 +                rbufsz = nfs_recv_bufsz;
     2611 +        }
     2612 +        /*
     2613 +         * Only set new buffer size if it's larger than the system
     2614 +         * default buffer size. If smaller buffer size is needed
     2615 +         * then use /etc/system to set nfs_default_bufsz to 1.
     2616 +         */
     2617 +        ok = connmgr_getopt_int(wq, SOL_SOCKET, SO_RCVBUF, &val, e, cr);
     2618 +        if ((ok == TRUE) && (val < sbufsz)) {
     2619 +                ok = connmgr_setopt_int(wq, SOL_SOCKET, SO_RCVBUF,
     2620 +                    sbufsz, e, cr);
     2621 +                DTRACE_PROBE2(connmgr_nfs_rcvbufsz__setopt,
     2622 +                    int, ok, calllist_t *, e);
     2623 +        }
     2624 +
     2625 +        ok = connmgr_getopt_int(wq, SOL_SOCKET, SO_SNDBUF, &val, e, cr);
     2626 +        if ((ok == TRUE) && (val < rbufsz)) {
     2627 +                ok = connmgr_setopt_int(wq, SOL_SOCKET, SO_SNDBUF,
     2628 +                    rbufsz, e, cr);
     2629 +                DTRACE_PROBE2(connmgr_nfs_sndbufsz__setopt,
     2630 +                    int, ok, calllist_t *, e);
     2631 +        }
     2632 +        return (TRUE);
     2633 +}
     2634 +
     2635 +/*
2561 2636   * Given an open stream, connect to the remote.  Returns true if connected,
2562 2637   * false otherwise.
2563 2638   */
2564 2639  static bool_t
2565 2640  connmgr_connect(
2566 2641          struct cm_xprt          *cm_entry,
2567 2642          queue_t                 *wq,
2568 2643          struct netbuf           *addr,
2569 2644          int                     addrfmly,
2570 2645          calllist_t              *e,
↓ open down ↓ 31 lines elided ↑ open up ↑
2602 2677                  ASSERT(rpcstat != NULL);
2603 2678  
2604 2679                  RPCLOG0(1, "connmgr_connect: cannot alloc mp for "
2605 2680                      "sending conn request\n");
2606 2681                  COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcnomem);
2607 2682                  e->call_status = RPC_SYSTEMERROR;
2608 2683                  e->call_reason = ENOSR;
2609 2684                  return (FALSE);
2610 2685          }
2611 2686  
     2687 +        /* Set TCP buffer size for NFS connections if needed */
     2688 +        (void) connmgr_nfs_setbufsz(e, addrfmly, addr, wq, cr);
     2689 +
2612 2690          mp->b_datap->db_type = M_PROTO;
2613 2691          tcr = (struct T_conn_req *)mp->b_rptr;
2614 2692          bzero(tcr, sizeof (*tcr));
2615 2693          tcr->PRIM_type = T_CONN_REQ;
2616 2694          tcr->DEST_length = addr->len;
2617 2695          tcr->DEST_offset = sizeof (struct T_conn_req);
2618 2696          mp->b_wptr = mp->b_rptr + sizeof (*tcr);
2619 2697  
2620 2698          bcopy(addr->buf, mp->b_wptr, tcr->DEST_length);
2621 2699          mp->b_wptr += tcr->DEST_length;
↓ open down ↓ 135 lines elided ↑ open up ↑
2757 2835          ((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))->
2758 2836              x_server.value.str.addr.ptr =
2759 2837              kmem_alloc(INET6_ADDRSTRLEN, KM_SLEEP);
2760 2838  
2761 2839          cm_entry->x_ksp->ks_update = conn_kstat_update;
2762 2840          kstat_install(cm_entry->x_ksp);
2763 2841          return (TRUE);
2764 2842  }
2765 2843  
2766 2844  /*
     2845 + * Verify that the specified offset falls within the mblk and
     2846 + * that the resulting pointer is aligned.
     2847 + * Returns NULL if not.
     2848 + *
     2849 + * code from fs/sockfs/socksubr.c
     2850 + */
     2851 +static void *
     2852 +connmgr_opt_getoff(mblk_t *mp, t_uscalar_t offset,
     2853 +    t_uscalar_t length, uint_t align_size)
     2854 +{
     2855 +        uintptr_t ptr1, ptr2;
     2856 +
     2857 +        ASSERT(mp && mp->b_wptr >= mp->b_rptr);
     2858 +        ptr1 = (uintptr_t)mp->b_rptr + offset;
     2859 +        ptr2 = (uintptr_t)ptr1 + length;
     2860 +        if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
     2861 +                return (NULL);
     2862 +        }
     2863 +        if ((ptr1 & (align_size - 1)) != 0) {
     2864 +                return (NULL);
     2865 +        }
     2866 +        return ((void *)ptr1);
     2867 +}
     2868 +
     2869 +static bool_t
     2870 +connmgr_getopt_int(queue_t *wq, int level, int name, int *val,
     2871 +    calllist_t *e, cred_t *cr)
     2872 +{
     2873 +        mblk_t *mp;
     2874 +        struct opthdr *opt, *opt_res;
     2875 +        struct T_optmgmt_req *tor;
     2876 +        struct T_optmgmt_ack *opt_ack;
     2877 +        struct timeval waitp;
     2878 +        int error;
     2879 +
     2880 +        mp = allocb_cred(sizeof (struct T_optmgmt_req) +
     2881 +            sizeof (struct opthdr) + sizeof (int), cr, NOPID);
     2882 +        if (mp == NULL) {
     2883 +                RPCLOG0(1, "connmgr_getopt: cannot alloc mp for option "
     2884 +                    "request\n");
     2885 +                return (FALSE);
     2886 +        }
     2887 +
     2888 +        mp->b_datap->db_type = M_PROTO;
     2889 +        tor = (struct T_optmgmt_req *)(mp->b_rptr);
     2890 +        tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
     2891 +        tor->MGMT_flags = T_CURRENT;
     2892 +        tor->OPT_length = sizeof (struct opthdr) + sizeof (int);
     2893 +        tor->OPT_offset = sizeof (struct T_optmgmt_req);
     2894 +
     2895 +        opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req));
     2896 +        opt->level = level;
     2897 +        opt->name = name;
     2898 +        opt->len = sizeof (int);
     2899 +        mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
     2900 +            sizeof (int);
     2901 +
     2902 +        /*
     2903 +         * We will use this connection regardless
     2904 +         * of whether or not the option is readable.
     2905 +         */
     2906 +        if (clnt_dispatch_send(wq, mp, e, 0, 0) != RPC_SUCCESS) {
     2907 +                DTRACE_PROBE(krpc__e__connmgr__getopt__cantsend);
     2908 +                freemsg(mp);
     2909 +                return (FALSE);
     2910 +        }
     2911 +
     2912 +        mutex_enter(&clnt_pending_lock);
     2913 +
     2914 +        waitp.tv_sec = clnt_cots_min_conntout;
     2915 +        waitp.tv_usec = 0;
     2916 +        error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1);
     2917 +
     2918 +        if (e->call_prev)
     2919 +                e->call_prev->call_next = e->call_next;
     2920 +        else
     2921 +                clnt_pending = e->call_next;
     2922 +        if (e->call_next)
     2923 +                e->call_next->call_prev = e->call_prev;
     2924 +        mutex_exit(&clnt_pending_lock);
     2925 +
     2926 +        /* get reply message */
     2927 +        mp = e->call_reply;
     2928 +        e->call_reply = NULL;
     2929 +
     2930 +        if ((!mp) || (e->call_status != RPC_SUCCESS) || (error != 0)) {
     2931 +
     2932 +                DTRACE_PROBE4(connmgr_getopt__failed, int, name,
     2933 +                    int, e->call_status, int, error, mblk_t *, mp);
     2934 +
     2935 +                if (mp)
     2936 +                        freemsg(mp);
     2937 +                return (FALSE);
     2938 +        }
     2939 +
     2940 +        opt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
     2941 +        opt_res = (struct opthdr *)connmgr_opt_getoff(mp, opt_ack->OPT_offset,
     2942 +            opt_ack->OPT_length, __TPI_ALIGN_SIZE);
     2943 +
     2944 +        if (!opt_res) {
     2945 +                DTRACE_PROBE4(connmgr_getopt__optres, mblk_t *, mp, int, name,
     2946 +                    int, opt_ack->OPT_offset, int, opt_ack->OPT_length);
     2947 +                freemsg(mp);
     2948 +                return (FALSE);
     2949 +        }
     2950 +        *val = *(int *)&opt_res[1];
     2951 +
     2952 +        DTRACE_PROBE2(connmgr_getopt__ok, int, name, int, *val);
     2953 +
     2954 +        freemsg(mp);
     2955 +        return (TRUE);
     2956 +}
     2957 +
     2958 +/*
2767 2959   * Called by connmgr_connect to set an option on the new stream.
2768 2960   */
2769 2961  static bool_t
2770      -connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr)
     2962 +connmgr_setopt_int(queue_t *wq, int level, int name, int val,
     2963 +    calllist_t *e, cred_t *cr)
2771 2964  {
2772 2965          mblk_t *mp;
2773 2966          struct opthdr *opt;
2774 2967          struct T_optmgmt_req *tor;
2775 2968          struct timeval waitp;
2776 2969          int error;
2777 2970  
2778 2971          mp = allocb_cred(sizeof (struct T_optmgmt_req) +
2779 2972              sizeof (struct opthdr) + sizeof (int), cr, NOPID);
2780 2973          if (mp == NULL) {
↓ open down ↓ 6 lines elided ↑ open up ↑
2787 2980          tor = (struct T_optmgmt_req *)(mp->b_rptr);
2788 2981          tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
2789 2982          tor->MGMT_flags = T_NEGOTIATE;
2790 2983          tor->OPT_length = sizeof (struct opthdr) + sizeof (int);
2791 2984          tor->OPT_offset = sizeof (struct T_optmgmt_req);
2792 2985  
2793 2986          opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req));
2794 2987          opt->level = level;
2795 2988          opt->name = name;
2796 2989          opt->len = sizeof (int);
2797      -        *(int *)((char *)opt + sizeof (*opt)) = 1;
     2990 +        *(int *)((char *)opt + sizeof (*opt)) = val;
2798 2991          mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
2799 2992              sizeof (int);
2800 2993  
2801 2994          /*
2802 2995           * We will use this connection regardless
2803 2996           * of whether or not the option is settable.
2804 2997           */
2805 2998          if (clnt_dispatch_send(wq, mp, e, 0, 0) != RPC_SUCCESS) {
2806 2999                  DTRACE_PROBE(krpc__e__connmgr__setopt__cantsend);
2807 3000                  freemsg(mp);
↓ open down ↓ 20 lines elided ↑ open up ↑
2828 3021          }
2829 3022  
2830 3023          if (e->call_status != RPC_SUCCESS || error != 0) {
2831 3024                  RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name);
2832 3025                  return (FALSE);
2833 3026          }
2834 3027          RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name);
2835 3028          return (TRUE);
2836 3029  }
2837 3030  
     3031 +static bool_t
     3032 +connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e, cred_t *cr)
     3033 +{
     3034 +        return (connmgr_setopt_int(wq, level, name, 1, e, cr));
     3035 +}
     3036 +
2838 3037  #ifdef  DEBUG
2839 3038  
2840 3039  /*
2841 3040   * This is a knob to let us force code coverage in allocation failure
2842 3041   * case.
2843 3042   */
2844 3043  static int      connmgr_failsnd;
2845 3044  #define CONN_SND_ALLOC(Size, Pri)       \
2846 3045          ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri))
2847 3046  
↓ open down ↓ 727 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX