--- old/usr/src/uts/common/inet/tcp/tcp.c Fri Aug 29 10:35:26 2008 +++ new/usr/src/uts/common/inet/tcp/tcp.c Fri Aug 29 10:35:24 2008 @@ -3477,6 +3477,8 @@ * requirement. */ if (tcp->tcp_state != TCPS_LISTEN) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_LISTEN); tcp->tcp_state = TCPS_LISTEN; /* Initialize the chain. Don't need the eager_lock */ tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp; @@ -3773,6 +3775,8 @@ * bound to prevent others from getting the port * number. */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; tcp->tcp_lport = htons(port); *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport; @@ -3904,6 +3908,9 @@ if (!tcp->tcp_tconnind_started) { CONN_DEC_REF(tcp->tcp_connp); } else { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; } } else { @@ -4637,6 +4644,10 @@ ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); ASSERT(tcp->tcp_time_wait_expire == 0); + if (connp->conn_fully_bound) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_CLOSED); + } tcp->tcp_state = TCPS_CLOSED; /* Release any SSL context */ @@ -5878,6 +5889,9 @@ * up multicasts to those that have expressed interest * TODO: what about rejecting broadcasts? * Also check that source is not a multicast or broadcast address. + * + * DTrace tcp:::state-change is probed a little further down, + * where it is set for the second time. */ eager->tcp_state = TCPS_SYN_RCVD; @@ -5898,6 +5912,13 @@ goto error3; } + /* + * DTrace the first SYN as a tcp:::receive. This is placed after + * tcp_adapt_ire() so that tcp->tcp_loopback has been set. + */ + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, + mp->b_rptr, tcp_t *, tcp, tcph_t *, tcph); + /* Process all TCP options. */ tcp_process_options(eager, tcph); @@ -6014,6 +6035,8 @@ eager->tcp_rnxt = seg_seq + 1; U32_TO_ABE32(eager->tcp_rnxt, eager->tcp_tcph->th_ack); BUMP_MIB(&tcps->tcps_mib, tcpPassiveOpens); + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, eager, + int32_t, TCPS_SYN_RCVD); eager->tcp_state = TCPS_SYN_RCVD; mp1 = tcp_xmit_mp(eager, eager->tcp_xmit_head, eager->tcp_mss, NULL, NULL, eager->tcp_iss, B_FALSE, NULL, B_FALSE); @@ -6123,6 +6146,23 @@ error3: CONN_DEC_REF(econnp); error2: + /* + * DTrace this tcp:::receive event, as we skipped the previous receive + * probe. For DTrace only, we find the IP header length so that the + * TCP header can be found. + */ + ipvers = IPH_HDR_VERSION(mp->b_rptr); + if (OK_32PTR(mp->b_rptr) && + (ipvers == IPV4_VERSION || ipvers == IPV6_VERSION)) { + if (ipvers == IPV4_VERSION) + ip_hdr_len = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr); + else + ip_hdr_len = ip_hdr_length_v6(mp, (ip6_t *)mp->b_rptr); + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, mp->b_rptr, tcp_t *, NULL, tcph_t *, + &mp->b_rptr[ip_hdr_len]); + } + freemsg(mp); } @@ -6621,6 +6661,8 @@ goto failed; } } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_SYN_SENT); tcp->tcp_state = TCPS_SYN_SENT; /* @@ -6633,6 +6675,8 @@ */ mp = mi_tpi_ok_ack_alloc(mp); if (!mp) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, oldstate); tcp->tcp_state = oldstate; goto failed; } @@ -6673,6 +6717,8 @@ return; } /* Error case */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, oldstate); tcp->tcp_state = oldstate; mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM); @@ -6829,6 +6875,8 @@ goto failed; } } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_SYN_SENT); tcp->tcp_state = TCPS_SYN_SENT; /* * TODO: allow data with connect requests @@ -6840,6 +6888,8 @@ */ mp = mi_tpi_ok_ack_alloc(mp); if (!mp) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, oldstate); tcp->tcp_state = oldstate; goto failed; } @@ -6866,6 +6916,8 @@ return; } /* Error case */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, oldstate); tcp->tcp_state = oldstate; mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM); @@ -7014,9 +7066,13 @@ ltcp = connp->conn_tcp; } if (tcp->tcp_conn_req_max && ltcp == NULL) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_LISTEN); tcp->tcp_state = TCPS_LISTEN; } else if (old_state > TCPS_BOUND) { tcp->tcp_conn_req_max = 0; + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; } if (ltcp != NULL) @@ -7917,6 +7973,8 @@ * is only removed from connected hash. * */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_LISTEN); tcp->tcp_state = TCPS_LISTEN; tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp; tcp->tcp_eager_next_drop_q0 = tcp; @@ -7932,6 +7990,8 @@ tcp->tcp_ipha->ipha_src, tcp->tcp_lport); } } else { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; } @@ -8293,6 +8353,7 @@ tcp->tcp_rq = q; tcp->tcp_wq = WR(q); + /* DTrace ignores this - it isn't a tcp:::state-change */ tcp->tcp_state = TCPS_IDLE; if ((err = tcp_init_values(tcp)) != 0) tcp_timers_stop(tcp); @@ -13294,6 +13355,13 @@ } } + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, + iphdr, tcp_t *, tcp, tcph_t *, tcph); + if (tcp->tcp_state == TCPS_SYN_RCVD && (flags & TH_ACK)) { + DTRACE_TCP5(accept__established, mblk_t *, NULL, conn_t *, + NULL, void_ip_t *, iphdr, tcp_t *, tcp, tcph_t *, tcph); + } + switch (tcp->tcp_state) { case TCPS_SYN_SENT: if (flags & TH_ACK) { @@ -13315,6 +13383,10 @@ ASSERT(tcp->tcp_suna + 1 == seg_ack); } if (flags & TH_RST) { + DTRACE_TCP5(connect__refused, mblk_t *, NULL, + conn_t *, NULL, void_ip_t *, iphdr, tcp_t *, NULL, + tcph_t *, tcph); + freemsg(mp); if (flags & TH_ACK) (void) tcp_clean_death(tcp, @@ -13389,9 +13461,18 @@ /* One for the SYN */ tcp->tcp_suna = tcp->tcp_iss + 1; tcp->tcp_valid_bits &= ~TCP_ISS_VALID; + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_ESTABLISHED); tcp->tcp_state = TCPS_ESTABLISHED; /* + * For DTrace observability, remember that we just + * established a connection and are about to send + * the final ACK. + */ + tcp->tcp_dtrace_connect_established = B_TRUE; + + /* * If SYN was retransmitted, need to reset all * retransmission info. This is because this * segment will be treated as a dup ACK. @@ -13498,6 +13579,8 @@ seg_seq++; break; } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_SYN_RCVD); tcp->tcp_state = TCPS_SYN_RCVD; mp1 = tcp_xmit_mp(tcp, tcp->tcp_xmit_head, tcp->tcp_mss, NULL, NULL, tcp->tcp_iss, B_FALSE, NULL, B_FALSE); @@ -14417,6 +14500,8 @@ tcp->tcp_max_swnd = new_swnd; tcp->tcp_swl1 = seg_seq; tcp->tcp_swl2 = seg_ack; + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_ESTABLISHED); tcp->tcp_state = TCPS_ESTABLISHED; tcp->tcp_valid_bits &= ~TCP_ISS_VALID; @@ -15053,6 +15138,9 @@ switch (tcp->tcp_state) { case TCPS_FIN_WAIT_1: if (tcp->tcp_fin_acked) { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_FIN_WAIT_2); tcp->tcp_state = TCPS_FIN_WAIT_2; /* * We implement the non-standard BSD/SunOS @@ -15085,6 +15173,9 @@ goto xmit_check; case TCPS_CLOSING: if (tcp->tcp_fin_acked) { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_TIME_WAIT); tcp->tcp_state = TCPS_TIME_WAIT; /* * Unconditionally clear the exclusive binding @@ -15130,16 +15221,25 @@ switch (tcp->tcp_state) { case TCPS_SYN_RCVD: case TCPS_ESTABLISHED: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_CLOSE_WAIT); tcp->tcp_state = TCPS_CLOSE_WAIT; /* Keepalive? */ break; case TCPS_FIN_WAIT_1: if (!tcp->tcp_fin_acked) { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_CLOSING); tcp->tcp_state = TCPS_CLOSING; break; } /* FALLTHRU */ case TCPS_FIN_WAIT_2: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_TIME_WAIT); tcp->tcp_state = TCPS_TIME_WAIT; /* * Unconditionally clear the exclusive binding @@ -16000,6 +16100,8 @@ /*NOTREACHED*/ } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_IDLE); tcp->tcp_state = TCPS_IDLE; if (tcp->tcp_ipversion == IPV4_VERSION) tcp->tcp_ipha->ipha_src = 0; @@ -16042,6 +16144,7 @@ int retval; mblk_t *ire_mp; tcp_stack_t *tcps = tcp->tcp_tcps; + uint_t ip_hdr_len; switch (mp->b_datap->db_type) { case M_PROTO: @@ -16226,6 +16329,24 @@ TCP_TRACE_SEND_PKT); mblk_setcred(syn_mp, cr); DB_CPID(syn_mp) = pid; + + /* + * DTrace sending the first SYN as a + * tcp:::connect-request event. For DTrace + * only, the IP header length is found + * so that the TCP header can be retrieved. + */ + if (tcp->tcp_ipversion == IPV4_VERSION) + ip_hdr_len = IPH_HDR_LENGTH( + (ipha_t *)syn_mp->b_rptr); + else + ip_hdr_len = ip_hdr_length_v6(mp, + (ip6_t *)syn_mp->b_rptr); + DTRACE_TCP5(connect__request, mblk_t *, NULL, + conn_t *, NULL, void_ip_t *, + syn_mp->b_rptr, tcp_t *, tcp, tcph_t *, + &syn_mp->b_rptr[ip_hdr_len]); + tcp_send_data(tcp, tcp->tcp_wq, syn_mp); } after_syn_sent: @@ -17845,6 +17966,8 @@ V6_SET_ZERO(tcp->tcp_ip_src_v6); bzero(tcp->tcp_tcph->th_lport, sizeof (tcp->tcp_tcph->th_lport)); tcp_bind_hash_remove(tcp); + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_IDLE); tcp->tcp_state = TCPS_IDLE; tcp->tcp_mdt = B_FALSE; /* Send M_FLUSH according to TPI */ @@ -19523,6 +19646,7 @@ uint_t ire_fp_mp_len; tcp_stack_t *tcps = tcp->tcp_tcps; ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; + uint_t ip_hdr_len; ASSERT(DB_TYPE(mp) == M_DATA); @@ -19533,6 +19657,18 @@ src = ipha->ipha_src; dst = ipha->ipha_dst; + if (tcp->tcp_ipversion == IPV4_VERSION) { + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, ipha, tcp_t *, tcp, tcph_t *, + &mp->b_rptr[IPH_HDR_LENGTH(mp->b_rptr)]); + if (tcp->tcp_dtrace_connect_established) { + DTRACE_TCP5(connect__established, mblk_t *, NULL, + conn_t *, NULL, void_ip_t *, ipha, tcp_t *, tcp, + tcph_t *, &mp->b_rptr[IPH_HDR_LENGTH(mp->b_rptr)]); + tcp->tcp_dtrace_connect_established = B_FALSE; + } + } + /* * Drop off fast path for IPv6 and also if options are present or * we need to resolve a TS label. @@ -19548,6 +19684,21 @@ if (tcp->tcp_snd_zcopy_aware) mp = tcp_zcopy_disable(tcp, mp); TCP_STAT(tcps, tcp_ip_send); + + if (tcp->tcp_ipversion == IPV6_VERSION) { + ip_hdr_len = ip_hdr_length_v6(mp, (ip6_t *)mp->b_rptr); + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, mp->b_rptr, tcp_t *, tcp, tcph_t *, + &mp->b_rptr[ip_hdr_len]); + if (tcp->tcp_dtrace_connect_established) { + DTRACE_TCP5(connect__established, mblk_t *, + NULL, conn_t *, NULL, void_ip_t *, + mp->b_rptr, tcp_t *, tcp, tcph_t *, + &mp->b_rptr[ip_hdr_len]); + tcp->tcp_dtrace_connect_established = B_FALSE; + } + } + CALL_IP_WPUT(connp, q, mp); return; } @@ -20956,6 +21107,9 @@ * in order for us to get here in * the first place. */ + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_FIN_WAIT_1); tcp->tcp_state = TCPS_FIN_WAIT_1; /* @@ -21591,6 +21745,9 @@ IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, mp, ipha, up, IPPROTO_TCP, IP_SIMPLE_HDR_LENGTH, ntohs(ipha->ipha_length), cksum); + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, ipha, + tcp_t *, tcp, tcph_t *, &mp->b_rptr[IPH_HDR_LENGTH(mp->b_rptr)]); + /* * Append LSO flag to DB_LSOFLAGS(mp) and set the mss to DB_LSOMSS(mp). */ @@ -23236,6 +23393,13 @@ } ipsec_mp = nmp; + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, + mp->b_rptr, tcp_t *, NULL, tcph_t *, tcph); + if (tcph->th_flags[0] == (TH_RST|TH_ACK)) { + DTRACE_TCP5(accept__refused, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, mp->b_rptr, tcp_t *, NULL, tcph_t *, tcph); + } + /* * NOTE: one might consider tracing a TCP packet here, but * this function has no active TCP state and no tcp structure @@ -23456,6 +23620,13 @@ seg_ack = BE32_TO_U32(tcph->th_ack); flags = tcph->th_flags[0]; + /* + * DTrace this "unknown" segment as a tcp:::receive, as we did + * just receive something that was TCP. + */ + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, rptr, + tcp_t *, NULL, tcph_t *, tcph); + seg_len = msgdsize(mp) - (TCP_HDR_LENGTH(tcph) + ip_hdr_len); if (flags & TH_RST) { freemsg(ipsec_mp); @@ -23793,9 +23964,15 @@ switch (tcp->tcp_state) { case TCPS_SYN_RCVD: case TCPS_ESTABLISHED: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_FIN_WAIT_1); tcp->tcp_state = TCPS_FIN_WAIT_1; break; case TCPS_CLOSE_WAIT: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_LAST_ACK); tcp->tcp_state = TCPS_LAST_ACK; break; } @@ -24145,6 +24322,8 @@ * that since INADDR_ANY is all 0, we do not need to set * tcp_bound_source to INADDR_ANY here. */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; tcp->tcp_lport = port; tcp->tcp_exclbind = 1;