Print this page
*** NO COMMENTS ***

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp.c
          +++ new/usr/src/uts/common/inet/tcp/tcp.c
↓ open down ↓ 3469 lines elided ↑ open up ↑
3470 3470                          tcp->tcp_conn_req_max = tcps->tcps_conn_req_min;
3471 3471                  if (tcp->tcp_conn_req_max > tcps->tcps_conn_req_max_q)
3472 3472                          tcp->tcp_conn_req_max = tcps->tcps_conn_req_max_q;
3473 3473                  /*
3474 3474                   * If this is a listener, do not reset the eager list
3475 3475                   * and other stuffs.  Note that we don't check if the
3476 3476                   * existing eager list meets the new tcp_conn_req_max
3477 3477                   * requirement.
3478 3478                   */
3479 3479                  if (tcp->tcp_state != TCPS_LISTEN) {
     3480 +                        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     3481 +                            tcp_t *, tcp, int32_t, TCPS_LISTEN);
3480 3482                          tcp->tcp_state = TCPS_LISTEN;
3481 3483                          /* Initialize the chain. Don't need the eager_lock */
3482 3484                          tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
3483 3485                          tcp->tcp_eager_next_drop_q0 = tcp;
3484 3486                          tcp->tcp_eager_prev_drop_q0 = tcp;
3485 3487                          tcp->tcp_second_ctimer_threshold =
3486 3488                              tcps->tcps_ip_abort_linterval;
3487 3489                  }
3488 3490          }
3489 3491  
↓ open down ↓ 276 lines elided ↑ open up ↑
3766 3768                  }
3767 3769                  if (ltcp != NULL) {
3768 3770                          /* The port number is busy */
3769 3771                          mutex_exit(&tbf->tf_lock);
3770 3772                  } else {
3771 3773                          /*
3772 3774                           * This port is ours. Insert in fanout and mark as
3773 3775                           * bound to prevent others from getting the port
3774 3776                           * number.
3775 3777                           */
     3778 +                        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     3779 +                            tcp_t *, tcp, int32_t, TCPS_BOUND);
3776 3780                          tcp->tcp_state = TCPS_BOUND;
3777 3781                          tcp->tcp_lport = htons(port);
3778 3782                          *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
3779 3783  
3780 3784                          ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
3781 3785                              tcp->tcp_lport)] == tbf);
3782 3786                          tcp_bind_hash_insert(tbf, tcp, 1);
3783 3787  
3784 3788                          mutex_exit(&tbf->tf_lock);
3785 3789  
↓ open down ↓ 111 lines elided ↑ open up ↑
3897 3901                           * conn_ind hasn't gone up, blow away the eager and drop
3898 3902                           * the last reference as well. If the conn_ind has gone
3899 3903                           * up, state should be BOUND. tcp_accept_finish
3900 3904                           * will figure out that the connection has received a
3901 3905                           * RST and will send a DISCON_IND to the application.
3902 3906                           */
3903 3907                          tcp_closei_local(tcp);
3904 3908                          if (!tcp->tcp_tconnind_started) {
3905 3909                                  CONN_DEC_REF(tcp->tcp_connp);
3906 3910                          } else {
     3911 +                                DTRACE_TCP4(state__change, void, NULL,
     3912 +                                    conn_t *, NULL, tcp_t *, tcp, int32_t,
     3913 +                                    TCPS_BOUND);
3907 3914                                  tcp->tcp_state = TCPS_BOUND;
3908 3915                          }
3909 3916                  } else {
3910 3917                          tcp_close_detached(tcp);
3911 3918                  }
3912 3919                  return (0);
3913 3920          }
3914 3921  
3915 3922          TCP_STAT(tcps, tcp_clean_death_nondetached);
3916 3923  
↓ open down ↓ 713 lines elided ↑ open up ↑
4630 4637          ire = connp->conn_ire_cache;
4631 4638          connp->conn_ire_cache = NULL;
4632 4639          mutex_exit(&connp->conn_lock);
4633 4640          if (ire != NULL)
4634 4641                  IRE_REFRELE_NOTR(ire);
4635 4642  
4636 4643          /* Need to cleanup any pending ioctls */
4637 4644          ASSERT(tcp->tcp_time_wait_next == NULL);
4638 4645          ASSERT(tcp->tcp_time_wait_prev == NULL);
4639 4646          ASSERT(tcp->tcp_time_wait_expire == 0);
     4647 +        if (connp->conn_fully_bound) {
     4648 +                DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     4649 +                    tcp_t *, tcp, int32_t, TCPS_CLOSED);
     4650 +        }
4640 4651          tcp->tcp_state = TCPS_CLOSED;
4641 4652  
4642 4653          /* Release any SSL context */
4643 4654          if (tcp->tcp_kssl_ent != NULL) {
4644 4655                  kssl_release_ent(tcp->tcp_kssl_ent, NULL, KSSL_NO_PROXY);
4645 4656                  tcp->tcp_kssl_ent = NULL;
4646 4657          }
4647 4658          if (tcp->tcp_kssl_ctx != NULL) {
4648 4659                  kssl_release_ctx(tcp->tcp_kssl_ctx);
4649 4660                  tcp->tcp_kssl_ctx = NULL;
↓ open down ↓ 1221 lines elided ↑ open up ↑
5871 5882          tcp_bind_hash_insert(&tcps->tcps_bind_fanout[
5872 5883              TCP_BIND_HASH(eager->tcp_lport)], eager, 0);
5873 5884  
5874 5885          CL_INET_CONNECT(eager);
5875 5886  
5876 5887          /*
5877 5888           * No need to check for multicast destination since ip will only pass
5878 5889           * up multicasts to those that have expressed interest
5879 5890           * TODO: what about rejecting broadcasts?
5880 5891           * Also check that source is not a multicast or broadcast address.
     5892 +         *
     5893 +         * DTrace tcp:::state-change is probed a little further down,
     5894 +         * where it is set for the second time.
5881 5895           */
5882 5896          eager->tcp_state = TCPS_SYN_RCVD;
5883 5897  
5884 5898  
5885 5899          /*
5886 5900           * There should be no ire in the mp as we are being called after
5887 5901           * receiving the SYN.
5888 5902           */
5889 5903          ASSERT(tcp_ire_mp(mp) == NULL);
5890 5904  
5891 5905          /*
5892 5906           * Adapt our mss, ttl, ... according to information provided in IRE.
5893 5907           */
5894 5908  
5895 5909          if (tcp_adapt_ire(eager, NULL) == 0) {
5896 5910                  /* Undo the bind_hash_insert */
5897 5911                  tcp_bind_hash_remove(eager);
5898 5912                  goto error3;
5899 5913          }
5900 5914  
     5915 +        /*
     5916 +         * DTrace the first SYN as a tcp:::receive. This is placed after
     5917 +         * tcp_adapt_ire() so that tcp->tcp_loopback has been set.
     5918 +         */
     5919 +        DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *,
     5920 +            mp->b_rptr, tcp_t *, tcp, tcph_t *, tcph);
     5921 +
5901 5922          /* Process all TCP options. */
5902 5923          tcp_process_options(eager, tcph);
5903 5924  
5904 5925          /* Is the other end ECN capable? */
5905 5926          if (tcps->tcps_ecn_permitted >= 1 &&
5906 5927              (tcph->th_flags[0] & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) {
5907 5928                  eager->tcp_ecn_ok = B_TRUE;
5908 5929          }
5909 5930  
5910 5931          /*
↓ open down ↓ 96 lines elided ↑ open up ↑
6007 6028           * So do all the work necessary before inserting the eager
6008 6029           * in its own perimeter. Be optimistic that ipcl_conn_insert()
6009 6030           * will succeed but undo everything if it fails.
6010 6031           */
6011 6032          seg_seq = ABE32_TO_U32(tcph->th_seq);
6012 6033          eager->tcp_irs = seg_seq;
6013 6034          eager->tcp_rack = seg_seq;
6014 6035          eager->tcp_rnxt = seg_seq + 1;
6015 6036          U32_TO_ABE32(eager->tcp_rnxt, eager->tcp_tcph->th_ack);
6016 6037          BUMP_MIB(&tcps->tcps_mib, tcpPassiveOpens);
     6038 +        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, eager,
     6039 +            int32_t, TCPS_SYN_RCVD);
6017 6040          eager->tcp_state = TCPS_SYN_RCVD;
6018 6041          mp1 = tcp_xmit_mp(eager, eager->tcp_xmit_head, eager->tcp_mss,
6019 6042              NULL, NULL, eager->tcp_iss, B_FALSE, NULL, B_FALSE);
6020 6043          if (mp1 == NULL) {
6021 6044                  /*
6022 6045                   * Increment the ref count as we are going to
6023 6046                   * enqueueing an mp in squeue
6024 6047                   */
6025 6048                  CONN_INC_REF(econnp);
6026 6049                  goto error;
↓ open down ↓ 89 lines elided ↑ open up ↑
6116 6139                              econnp, SQTAG_TCP_CONN_REQ_1);
6117 6140                  }
6118 6141          } else {
6119 6142                  /* Nobody wants this packet */
6120 6143                  freemsg(mp);
6121 6144          }
6122 6145          return;
6123 6146  error3:
6124 6147          CONN_DEC_REF(econnp);
6125 6148  error2:
     6149 +        /*
     6150 +         * DTrace this tcp:::receive event, as we skipped the previous receive
     6151 +         * probe. For DTrace only, we find the IP header length so that the
     6152 +         * TCP header can be found.
     6153 +         */
     6154 +        ipvers = IPH_HDR_VERSION(mp->b_rptr);
     6155 +        if (OK_32PTR(mp->b_rptr) &&
     6156 +            (ipvers == IPV4_VERSION || ipvers == IPV6_VERSION)) {
     6157 +                if (ipvers == IPV4_VERSION)
     6158 +                        ip_hdr_len = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr);
     6159 +                else
     6160 +                        ip_hdr_len = ip_hdr_length_v6(mp, (ip6_t *)mp->b_rptr);
     6161 +                DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL,
     6162 +                    void_ip_t *, mp->b_rptr, tcp_t *, NULL, tcph_t *,
     6163 +                    &mp->b_rptr[ip_hdr_len]);
     6164 +        }
     6165 +
6126 6166          freemsg(mp);
6127 6167  }
6128 6168  
6129 6169  /*
6130 6170   * In an ideal case of vertical partition in NUMA architecture, its
6131 6171   * beneficial to have the listener and all the incoming connections
6132 6172   * tied to the same squeue. The other constraint is that incoming
6133 6173   * connections should be tied to the squeue attached to interrupted
6134 6174   * CPU for obvious locality reason so this leaves the listener to
6135 6175   * be tied to the same squeue. Our only problem is that when listener
↓ open down ↓ 478 lines elided ↑ open up ↑
6614 6654                   */
6615 6655                  lport = tcp_update_next_port(tcps->tcps_next_port_to_try,
6616 6656                      tcp, B_TRUE);
6617 6657                  lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE,
6618 6658                      B_FALSE, B_FALSE);
6619 6659                  if (lport == 0) {
6620 6660                          mp = mi_tpi_err_ack_alloc(mp, TNOADDR, 0);
6621 6661                          goto failed;
6622 6662                  }
6623 6663          }
     6664 +        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp,
     6665 +            int32_t, TCPS_SYN_SENT);
6624 6666          tcp->tcp_state = TCPS_SYN_SENT;
6625 6667  
6626 6668          /*
6627 6669           * TODO: allow data with connect requests
6628 6670           * by unlinking M_DATA trailers here and
6629 6671           * linking them in behind the T_OK_ACK mblk.
6630 6672           * The tcp_rput() bind ack handler would then
6631 6673           * feed them to tcp_wput_data() rather than call
6632 6674           * tcp_timer().
6633 6675           */
6634 6676          mp = mi_tpi_ok_ack_alloc(mp);
6635 6677          if (!mp) {
     6678 +                DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     6679 +                    tcp_t *, tcp, int32_t, oldstate);
6636 6680                  tcp->tcp_state = oldstate;
6637 6681                  goto failed;
6638 6682          }
6639 6683          if (tcp->tcp_family == AF_INET) {
6640 6684                  mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ,
6641 6685                      sizeof (ipa_conn_t));
6642 6686          } else {
6643 6687                  mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ,
6644 6688                      sizeof (ipa6_conn_t));
6645 6689          }
↓ open down ↓ 20 lines elided ↑ open up ↑
6666 6710                  /*
6667 6711                   * If the bind cannot complete immediately
6668 6712                   * IP will arrange to call tcp_rput_other
6669 6713                   * when the bind completes.
6670 6714                   */
6671 6715                  if (mp1 != NULL)
6672 6716                          tcp_rput_other(tcp, mp1);
6673 6717                  return;
6674 6718          }
6675 6719          /* Error case */
     6720 +        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp,
     6721 +            int32_t, oldstate);
6676 6722          tcp->tcp_state = oldstate;
6677 6723          mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM);
6678 6724  
6679 6725  failed:
6680 6726          /* return error ack and blow away saved option results if any */
6681 6727          if (mp != NULL)
6682 6728                  putnext(tcp->tcp_rq, mp);
6683 6729          else {
6684 6730                  tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
6685 6731                      TSYSERR, ENOMEM);
↓ open down ↓ 136 lines elided ↑ open up ↑
6822 6868                   */
6823 6869                  lport = tcp_update_next_port(tcps->tcps_next_port_to_try,
6824 6870                      tcp, B_TRUE);
6825 6871                  lport = tcp_bindi(tcp, lport, &tcp->tcp_ip_src_v6, 0, B_TRUE,
6826 6872                      B_FALSE, B_FALSE);
6827 6873                  if (lport == 0) {
6828 6874                          mp = mi_tpi_err_ack_alloc(mp, TNOADDR, 0);
6829 6875                          goto failed;
6830 6876                  }
6831 6877          }
     6878 +        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp,
     6879 +            int32_t, TCPS_SYN_SENT);
6832 6880          tcp->tcp_state = TCPS_SYN_SENT;
6833 6881          /*
6834 6882           * TODO: allow data with connect requests
6835 6883           * by unlinking M_DATA trailers here and
6836 6884           * linking them in behind the T_OK_ACK mblk.
6837 6885           * The tcp_rput() bind ack handler would then
6838 6886           * feed them to tcp_wput_data() rather than call
6839 6887           * tcp_timer().
6840 6888           */
6841 6889          mp = mi_tpi_ok_ack_alloc(mp);
6842 6890          if (!mp) {
     6891 +                DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     6892 +                    tcp_t *, tcp, int32_t, oldstate);
6843 6893                  tcp->tcp_state = oldstate;
6844 6894                  goto failed;
6845 6895          }
6846 6896          mp1 = tcp_ip_bind_mp(tcp, O_T_BIND_REQ, sizeof (ipa6_conn_t));
6847 6897          if (mp1) {
6848 6898                  /*
6849 6899                   * We need to make sure that the conn_recv is set to a non-null
6850 6900                   * value before we insert the conn_t into the classifier table.
6851 6901                   * This is to avoid a race with an incoming packet which does
6852 6902                   * an ipcl_classify().
↓ open down ↓ 6 lines elided ↑ open up ↑
6859 6909                  mp1 = ip_bind_v6(tcp->tcp_wq, mp1, tcp->tcp_connp,
6860 6910                      &tcp->tcp_sticky_ipp);
6861 6911                  BUMP_MIB(&tcps->tcps_mib, tcpActiveOpens);
6862 6912                  tcp->tcp_active_open = 1;
6863 6913                  /* ip_bind_v6() may return ACK or ERROR */
6864 6914                  if (mp1 != NULL)
6865 6915                          tcp_rput_other(tcp, mp1);
6866 6916                  return;
6867 6917          }
6868 6918          /* Error case */
     6919 +        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp,
     6920 +            int32_t, oldstate);
6869 6921          tcp->tcp_state = oldstate;
6870 6922          mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM);
6871 6923  
6872 6924  failed:
6873 6925          /* return error ack and blow away saved option results if any */
6874 6926          if (mp != NULL)
6875 6927                  putnext(tcp->tcp_rq, mp);
6876 6928          else {
6877 6929                  tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
6878 6930                      TSYSERR, ENOMEM);
↓ open down ↓ 128 lines elided ↑ open up ↑
7007 7059                                  ltcp = connp->conn_tcp;
7008 7060                  } else {
7009 7061                          /* Allow tcp_bound_if listeners? */
7010 7062                          connp = ipcl_lookup_listener_v6(tcp->tcp_lport,
7011 7063                              &tcp->tcp_ip6h->ip6_src, 0,
7012 7064                              tcp->tcp_connp->conn_zoneid, ipst);
7013 7065                          if (connp != NULL)
7014 7066                                  ltcp = connp->conn_tcp;
7015 7067                  }
7016 7068                  if (tcp->tcp_conn_req_max && ltcp == NULL) {
     7069 +                        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     7070 +                            tcp_t *, tcp, int32_t, TCPS_LISTEN);
7017 7071                          tcp->tcp_state = TCPS_LISTEN;
7018 7072                  } else if (old_state > TCPS_BOUND) {
7019 7073                          tcp->tcp_conn_req_max = 0;
     7074 +                        DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     7075 +                            tcp_t *, tcp, int32_t, TCPS_BOUND);
7020 7076                          tcp->tcp_state = TCPS_BOUND;
7021 7077                  }
7022 7078                  if (ltcp != NULL)
7023 7079                          CONN_DEC_REF(ltcp->tcp_connp);
7024 7080                  if (old_state == TCPS_SYN_SENT || old_state == TCPS_SYN_RCVD) {
7025 7081                          BUMP_MIB(&tcps->tcps_mib, tcpAttemptFails);
7026 7082                  } else if (old_state == TCPS_ESTABLISHED ||
7027 7083                      old_state == TCPS_CLOSE_WAIT) {
7028 7084                          BUMP_MIB(&tcps->tcps_mib, tcpEstabResets);
7029 7085                  }
↓ open down ↓ 880 lines elided ↑ open up ↑
7910 7966                   *
7911 7967                   * Note that this stream is still bound to the four
7912 7968                   * tuples of the previous connection in IP.  If a new
7913 7969                   * SYN with different foreign address comes in, IP will
7914 7970                   * not find it and will send it to the global queue.  In
7915 7971                   * the global queue, TCP will do a tcp_lookup_listener()
7916 7972                   * to find this stream.  This works because this stream
7917 7973                   * is only removed from connected hash.
7918 7974                   *
7919 7975                   */
     7976 +                DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     7977 +                    tcp_t *, tcp, int32_t, TCPS_LISTEN);
7920 7978                  tcp->tcp_state = TCPS_LISTEN;
7921 7979                  tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
7922 7980                  tcp->tcp_eager_next_drop_q0 = tcp;
7923 7981                  tcp->tcp_eager_prev_drop_q0 = tcp;
7924 7982                  tcp->tcp_connp->conn_recv = tcp_conn_request;
7925 7983                  if (tcp->tcp_family == AF_INET6) {
7926 7984                          ASSERT(tcp->tcp_connp->conn_af_isv6);
7927 7985                          (void) ipcl_bind_insert_v6(tcp->tcp_connp, IPPROTO_TCP,
7928 7986                              &tcp->tcp_ip6h->ip6_src, tcp->tcp_lport);
7929 7987                  } else {
7930 7988                          ASSERT(!tcp->tcp_connp->conn_af_isv6);
7931 7989                          (void) ipcl_bind_insert(tcp->tcp_connp, IPPROTO_TCP,
7932 7990                              tcp->tcp_ipha->ipha_src, tcp->tcp_lport);
7933 7991                  }
7934 7992          } else {
     7993 +                DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL,
     7994 +                    tcp_t *, tcp, int32_t, TCPS_BOUND);
7935 7995                  tcp->tcp_state = TCPS_BOUND;
7936 7996          }
7937 7997  
7938 7998          /*
7939 7999           * Initialize to default values
7940 8000           * Can't fail since enough header template space already allocated
7941 8001           * at open().
7942 8002           */
7943 8003          err = tcp_init_values(tcp);
7944 8004          ASSERT(err == 0);
↓ open down ↓ 341 lines elided ↑ open up ↑
8286 8346   * Guaranteed not to fail so that when an error is returned,
8287 8347   * the caller doesn't need to do any additional cleanup.
8288 8348   */
8289 8349  int
8290 8350  tcp_init(tcp_t *tcp, queue_t *q)
8291 8351  {
8292 8352          int     err;
8293 8353  
8294 8354          tcp->tcp_rq = q;
8295 8355          tcp->tcp_wq = WR(q);
     8356 +        /* DTrace ignores this - it isn't a tcp:::state-change */
8296 8357          tcp->tcp_state = TCPS_IDLE;
8297 8358          if ((err = tcp_init_values(tcp)) != 0)
8298 8359                  tcp_timers_stop(tcp);
8299 8360          return (err);
8300 8361  }
8301 8362  
8302 8363  static int
8303 8364  tcp_init_values(tcp_t *tcp)
8304 8365  {
8305 8366          int     err;