--- old/usr/src/lib/libdtrace/Makefile.com Fri Aug 29 10:35:01 2008 +++ new/usr/src/lib/libdtrace/Makefile.com Fri Aug 29 10:35:00 2008 @@ -84,6 +84,7 @@ sched.d \ signal.d \ sysevent.d \ + tcp.d \ unistd.d include ../../Makefile.lib @@ -98,6 +99,7 @@ CLEANFILES += ../common/io.sed ../common/io.d CLEANFILES += ../common/ip.sed ../common/ip.d CLEANFILES += ../common/net.sed ../common/net.d +CLEANFILES += ../common/tcp.sed ../common/tcp.d CLEANFILES += ../common/errno.d ../common/signal.d CLEANFILES += ../common/dt_errtags.c ../common/dt_names.c CLEANFILES += ../common/sysevent.sed ../common/sysevent.d @@ -176,6 +178,9 @@ ../common/net.d: ../common/net.sed ../common/net.d.in sed -f ../common/net.sed < ../common/net.d.in > $@ +../common/tcp.d: ../common/tcp.sed ../common/tcp.d.in + sed -f ../common/tcp.sed < ../common/tcp.d.in > $@ + ../common/sysevent.d: ../common/sysevent.sed ../common/sysevent.d.in sed -f ../common/sysevent.sed < ../common/sysevent.d.in > $@ --- old/usr/src/lib/libdtrace/common/ip.d.in Fri Aug 29 10:35:05 2008 +++ new/usr/src/lib/libdtrace/common/ip.d.in Fri Aug 29 10:35:04 2008 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -183,6 +183,13 @@ */ typedef ill_t __dtrace_ipsr_ill_t; +/* + * __dtrace_tcpf_ipinfo_t is used by the translators to construct an + * ipinfo_t during tcp fusion, from a tcp_t plus an additional arg4 for + * the payload bytes. + */ +typedef tcp_t __dtrace_tcpf_ipinfo_t; + #pragma D binding "1.0" translator translator pktinfo_t < mblk_t *M > { pkt_addr = NULL; @@ -226,6 +233,18 @@ inet_ntoa6(&((ip6_t *)I)->ip6_dst) : "") : ""; }; +#pragma D binding "1.0" translator +translator ipinfo_t < __dtrace_tcpf_ipinfo_t *T > { + ip_ver = T->tcp_ipversion; + ip_plength = arg4; /* probe dependent */ + ip_saddr = inet_ntoa6(probename == "send" ? + &T->tcp_connp->connua_v6addr.connua_laddr : + &T->tcp_connp->connua_v6addr.connua_faddr); + ip_daddr = inet_ntoa6(probename == "send" ? + &T->tcp_connp->connua_v6addr.connua_faddr : + &T->tcp_connp->connua_v6addr.connua_laddr); +}; + #pragma D binding "1.0" translator translator ifinfo_t < __dtrace_ipsr_ill_t *I > { if_name = I != NULL ? stringof(I->ill_name) : ""; --- /dev/null Fri Aug 29 10:35:08 2008 +++ new/usr/src/lib/libdtrace/common/tcp.d.in Fri Aug 29 10:35:06 2008 @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma D depends_on module unix +#pragma D depends_on provider tcp + +inline int TH_FIN = @TH_FIN@; +#pragma D binding "1.0" TH_FIN +inline int TH_SYN = @TH_SYN@; +#pragma D binding "1.0" TH_SYN +inline int TH_RST = @TH_RST@; +#pragma D binding "1.0" TH_RST +inline int TH_PUSH = @TH_PUSH@; +#pragma D binding "1.0" TH_PUSH +inline int TH_ACK = @TH_ACK@; +#pragma D binding "1.0" TH_ACK +inline int TH_URG = @TH_URG@; +#pragma D binding "1.0" TH_URG +inline int TH_ECE = @TH_ECE@; +#pragma D binding "1.0" TH_ECE +inline int TH_CWR = @TH_CWR@; +#pragma D binding "1.0" TH_CWR + +/* + * tcpinfo is the TCP header fields. + */ +typedef struct tcpinfo { + uint16_t tcp_sport; /* source port */ + uint16_t tcp_dport; /* destination port */ + uint32_t tcp_seq; /* sequence number */ + uint32_t tcp_ack; /* acknowledgment number */ + uint8_t tcp_offset; /* data offset, in bytes */ + uint8_t tcp_flags; /* flags */ + uint16_t tcp_window; /* window size */ + uint16_t tcp_checksum; /* checksum */ + uint16_t tcp_urgent; /* urgent data pointer */ + tcph_t *tcp_hdr; /* raw TCP header */ +} tcpinfo_t; + +/* + * tcpsinfo contains stable TCP details from tcp_t. + */ +typedef struct tcpsinfo { + int tcps_local; /* is delivered locally, boolean */ + int tcps_active; /* active open (from here), boolean */ + string tcps_state; /* TCP state, as a string */ +} tcpsinfo_t; + +/* + * tcpnsinfo provides the new tcp state for state changes. + */ +typedef struct tcpnsinfo { + string tcps_state; /* TCP state, as a string */ +} tcpnsinfo_t; + +/* + * tcpfinfo contains additional TCP details from tcp_t, that are stable + * for local (tcp-fusion) connections. + */ +typedef struct tcpfinfo { + uint16_t tcpf_sport; /* source port */ + uint16_t tcpf_dport; /* destination port */ +} tcpfinfo_t; + +#pragma D binding "1.0" translator +translator tcpinfo_t < tcph_t *T > { + tcp_sport = ntohs(*(uint16_t *)T->th_lport); + tcp_dport = ntohs(*(uint16_t *)T->th_fport); + tcp_seq = ntohl(*(uint32_t *)T->th_seq); + tcp_ack = ntohl(*(uint32_t *)T->th_ack); + tcp_offset = (*(uint8_t *)T->th_offset_and_rsrvd & 0xf0) >> 2; + tcp_flags = *(uint8_t *)T->th_flags; + tcp_window = ntohs(*(uint16_t *)T->th_win); + tcp_checksum = ntohs(*(uint16_t *)T->th_sum); + tcp_urgent = ntohs(*(uint16_t *)T->th_urp); + tcp_hdr = T; +}; + +#pragma D binding "1.0" translator +translator tcpsinfo_t < tcp_t *T > { + tcps_local = T ? T->tcp_loopback : 0; + tcps_active = T ? T->tcp_active_open : 0; + tcps_state = T ? + T->tcp_state == @TCPS_CLOSED@ ? "state-closed" : + T->tcp_state == @TCPS_IDLE@ ? "state-idle" : + T->tcp_state == @TCPS_BOUND@ ? "state-bound" : + T->tcp_state == @TCPS_LISTEN@ ? "state-listen" : + T->tcp_state == @TCPS_SYN_SENT@ ? "state-syn-sent" : + T->tcp_state == @TCPS_SYN_RCVD@ ? "state-syn-received" : + T->tcp_state == @TCPS_ESTABLISHED@ ? "state-established" : + T->tcp_state == @TCPS_CLOSE_WAIT@ ? "state-close-wait" : + T->tcp_state == @TCPS_FIN_WAIT_1@ ? "state-fin-wait1" : + T->tcp_state == @TCPS_CLOSING@ ? "state-closing" : + T->tcp_state == @TCPS_LAST_ACK@ ? "state-last-ack" : + T->tcp_state == @TCPS_FIN_WAIT_2@ ? "state-fin-wait2" : + T->tcp_state == @TCPS_TIME_WAIT@ ? "state-time-wait" : + "" : "unknown"; +}; + +#pragma D binding "1.0" translator +translator tcpnsinfo_t < int32_t I > { + tcps_state = + I == @TCPS_CLOSED@ ? "state-closed" : + I == @TCPS_IDLE@ ? "state-idle" : + I == @TCPS_BOUND@ ? "state-bound" : + I == @TCPS_LISTEN@ ? "state-listen" : + I == @TCPS_SYN_SENT@ ? "state-syn-sent" : + I == @TCPS_SYN_RCVD@ ? "state-syn-received" : + I == @TCPS_ESTABLISHED@ ? "state-established" : + I == @TCPS_CLOSE_WAIT@ ? "state-close-wait" : + I == @TCPS_FIN_WAIT_1@ ? "state-fin-wait1" : + I == @TCPS_CLOSING@ ? "state-closing" : + I == @TCPS_LAST_ACK@ ? "state-last-ack" : + I == @TCPS_FIN_WAIT_2@ ? "state-fin-wait2" : + I == @TCPS_TIME_WAIT@ ? "state-time-wait" : + ""; +}; + +#pragma D binding "1.0" translator +translator tcpfinfo_t < tcp_t *T > { + tcpf_sport = probename == "send" ? + ntohs(T->tcp_connp->u_port.tcpu_ports.tcpu_lport) : + ntohs(T->tcp_connp->u_port.tcpu_ports.tcpu_fport); + tcpf_dport = probename == "send" ? + ntohs(T->tcp_connp->u_port.tcpu_ports.tcpu_fport) : + ntohs(T->tcp_connp->u_port.tcpu_ports.tcpu_lport); +}; --- /dev/null Fri Aug 29 10:35:11 2008 +++ new/usr/src/lib/libdtrace/common/tcp.sed.in Fri Aug 29 10:35:10 2008 @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include + +#define SED_REPLACE(x) s/#x/x/g + +SED_REPLACE(TH_FIN) +SED_REPLACE(TH_SYN) +SED_REPLACE(TH_RST) +SED_REPLACE(TH_PUSH) +SED_REPLACE(TH_ACK) +SED_REPLACE(TH_URG) +SED_REPLACE(TH_ECE) +SED_REPLACE(TH_CWR) + +SED_REPLACE(TCPS_CLOSED) +SED_REPLACE(TCPS_IDLE) +SED_REPLACE(TCPS_BOUND) +SED_REPLACE(TCPS_LISTEN) +SED_REPLACE(TCPS_SYN_SENT) +SED_REPLACE(TCPS_SYN_RCVD) +SED_REPLACE(TCPS_ESTABLISHED) +SED_REPLACE(TCPS_CLOSE_WAIT) +SED_REPLACE(TCPS_FIN_WAIT_1) +SED_REPLACE(TCPS_CLOSING) +SED_REPLACE(TCPS_LAST_ACK) +SED_REPLACE(TCPS_FIN_WAIT_2) +SED_REPLACE(TCPS_TIME_WAIT) --- old/usr/src/pkgdefs/SUNWdtrc/prototype_com Fri Aug 29 10:35:13 2008 +++ new/usr/src/pkgdefs/SUNWdtrc/prototype_com Fri Aug 29 10:35:12 2008 @@ -57,6 +57,7 @@ f none usr/lib/dtrace/sched.d 644 root bin f none usr/lib/dtrace/signal.d 644 root bin f none usr/lib/dtrace/sysevent.d 644 root bin +f none usr/lib/dtrace/tcp.d 644 root bin f none usr/lib/dtrace/unistd.d 644 root bin f none usr/lib/libdtrace.so.1 755 root bin s none usr/lib/libdtrace.so=libdtrace.so.1 --- old/usr/src/uts/common/dtrace/sdt_subr.c Fri Aug 29 10:35:16 2008 +++ new/usr/src/uts/common/dtrace/sdt_subr.c Fri Aug 29 10:35:14 2008 @@ -92,6 +92,8 @@ { "proc", "__proc_", &stab_attr, 0 }, { "io", "__io_", &stab_attr, 0 }, { "ip", "__ip_", &stab_attr, 0 }, + { "tcp", "__tcp_", &stab_attr, 0 }, + { "tcpf", "__tcpf_", &stab_attr, 0 }, { "mib", "__mib_", &stab_attr, 0 }, { "fsinfo", "__fsinfo_", &fsinfo_attr, 0 }, { "nfsv3", "__nfsv3_", &stab_attr, 0 }, @@ -804,6 +806,58 @@ { "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" }, { "ip", "receive", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */ + { "tcp", "connect-established", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-established", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "connect-established", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-established", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-established", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "connect-refused", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-refused", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "connect-refused", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-refused", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "connect-request", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-request", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "connect-request", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-request", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-request", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "accept-established", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "accept-established", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "accept-established", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "accept-established", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "accept-established", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "accept-refused", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "accept-refused", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "accept-refused", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "accept-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "accept-refused", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "state-change", 0, 0, "void", "void" }, + { "tcp", "state-change", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "state-change", 2, 2, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "state-change", 3, 3, "int32_t", "tcpnsinfo_t *" }, + { "tcp", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "send", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "send", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "send", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "send", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "receive", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcp", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "receive", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "receive", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcpf", "send", 0, 0, "void", "void" }, + { "tcpf", "send", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcpf", "send", 2, 2, "__dtrace_tcpf_ipinfo_t *", "ipinfo_t *" }, + { "tcpf", "send", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcpf", "send", 4, 3, "tcp_t *", "tcpfinfo_t *" }, + { "tcpf", "send", 5, 4, "uint_t" }, /* __dtrace_tcpf_ipinfo_t */ + { "tcpf", "receive", 0, 0, "void", "void" }, + { "tcpf", "receive", 1, 1, "conn_t *", "csinfo_t *" }, + { "tcpf", "receive", 2, 2, "__dtrace_tcpf_ipinfo_t *", "ipinfo_t *" }, + { "tcpf", "receive", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcpf", "receive", 4, 3, "tcp_t *", "tcpfinfo_t *" }, + { "tcpf", "receive", 5, 4, "uint_t" }, /* __dtrace_tcpf_ipinfo_t */ + { "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" }, { "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" }, --- old/usr/src/uts/common/inet/ip/ip.c Fri Aug 29 10:35:19 2008 +++ new/usr/src/uts/common/inet/ip/ip.c Fri Aug 29 10:35:18 2008 @@ -6698,6 +6698,8 @@ uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers); if ((flags & TH_RST) || (flags & TH_URG)) { + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, ipha, tcp_t *, NULL, tcph_t *, tcph); CONN_DEC_REF(connp); freemsg(first_mp); return; @@ -6709,6 +6711,8 @@ return; } + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, ipha, tcp_t *, NULL, tcph_t *, tcph); CONN_DEC_REF(connp); freemsg(first_mp); return; @@ -13137,6 +13141,8 @@ BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); /* No need to send this packet to TCP */ if ((flags & TH_RST) || (flags & TH_URG)) { + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, ipha, tcp_t *, NULL, tcph_t *, tcph); CONN_DEC_REF(connp); freemsg(first_mp); return (NULL); @@ -13148,6 +13154,9 @@ return (NULL); } + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, ipha, tcp_t *, NULL, tcph_t *, tcph); + CONN_DEC_REF(connp); freemsg(first_mp); return (NULL); --- old/usr/src/uts/common/inet/tcp.h Fri Aug 29 10:35:23 2008 +++ new/usr/src/uts/common/inet/tcp.h Fri Aug 29 10:35:22 2008 @@ -590,6 +590,8 @@ struct tcp_s *tcp_eager_prev_drop_q0; struct tcp_s *tcp_eager_next_drop_q0; + boolean_t tcp_dtrace_connect_established; /* trace this event */ + /* * Have we flow controlled xmitter? * This variable can be modified outside the squeue and hence must --- old/usr/src/uts/common/inet/tcp/tcp.c Fri Aug 29 10:35:26 2008 +++ new/usr/src/uts/common/inet/tcp/tcp.c Fri Aug 29 10:35:24 2008 @@ -3477,6 +3477,8 @@ * requirement. */ if (tcp->tcp_state != TCPS_LISTEN) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_LISTEN); tcp->tcp_state = TCPS_LISTEN; /* Initialize the chain. Don't need the eager_lock */ tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp; @@ -3773,6 +3775,8 @@ * bound to prevent others from getting the port * number. */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; tcp->tcp_lport = htons(port); *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport; @@ -3904,6 +3908,9 @@ if (!tcp->tcp_tconnind_started) { CONN_DEC_REF(tcp->tcp_connp); } else { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; } } else { @@ -4637,6 +4644,10 @@ ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); ASSERT(tcp->tcp_time_wait_expire == 0); + if (connp->conn_fully_bound) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_CLOSED); + } tcp->tcp_state = TCPS_CLOSED; /* Release any SSL context */ @@ -5878,6 +5889,9 @@ * up multicasts to those that have expressed interest * TODO: what about rejecting broadcasts? * Also check that source is not a multicast or broadcast address. + * + * DTrace tcp:::state-change is probed a little further down, + * where it is set for the second time. */ eager->tcp_state = TCPS_SYN_RCVD; @@ -5898,6 +5912,13 @@ goto error3; } + /* + * DTrace the first SYN as a tcp:::receive. This is placed after + * tcp_adapt_ire() so that tcp->tcp_loopback has been set. + */ + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, + mp->b_rptr, tcp_t *, tcp, tcph_t *, tcph); + /* Process all TCP options. */ tcp_process_options(eager, tcph); @@ -6014,6 +6035,8 @@ eager->tcp_rnxt = seg_seq + 1; U32_TO_ABE32(eager->tcp_rnxt, eager->tcp_tcph->th_ack); BUMP_MIB(&tcps->tcps_mib, tcpPassiveOpens); + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, eager, + int32_t, TCPS_SYN_RCVD); eager->tcp_state = TCPS_SYN_RCVD; mp1 = tcp_xmit_mp(eager, eager->tcp_xmit_head, eager->tcp_mss, NULL, NULL, eager->tcp_iss, B_FALSE, NULL, B_FALSE); @@ -6123,6 +6146,23 @@ error3: CONN_DEC_REF(econnp); error2: + /* + * DTrace this tcp:::receive event, as we skipped the previous receive + * probe. For DTrace only, we find the IP header length so that the + * TCP header can be found. + */ + ipvers = IPH_HDR_VERSION(mp->b_rptr); + if (OK_32PTR(mp->b_rptr) && + (ipvers == IPV4_VERSION || ipvers == IPV6_VERSION)) { + if (ipvers == IPV4_VERSION) + ip_hdr_len = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr); + else + ip_hdr_len = ip_hdr_length_v6(mp, (ip6_t *)mp->b_rptr); + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, mp->b_rptr, tcp_t *, NULL, tcph_t *, + &mp->b_rptr[ip_hdr_len]); + } + freemsg(mp); } @@ -6621,6 +6661,8 @@ goto failed; } } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_SYN_SENT); tcp->tcp_state = TCPS_SYN_SENT; /* @@ -6633,6 +6675,8 @@ */ mp = mi_tpi_ok_ack_alloc(mp); if (!mp) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, oldstate); tcp->tcp_state = oldstate; goto failed; } @@ -6673,6 +6717,8 @@ return; } /* Error case */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, oldstate); tcp->tcp_state = oldstate; mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM); @@ -6829,6 +6875,8 @@ goto failed; } } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_SYN_SENT); tcp->tcp_state = TCPS_SYN_SENT; /* * TODO: allow data with connect requests @@ -6840,6 +6888,8 @@ */ mp = mi_tpi_ok_ack_alloc(mp); if (!mp) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, oldstate); tcp->tcp_state = oldstate; goto failed; } @@ -6866,6 +6916,8 @@ return; } /* Error case */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, oldstate); tcp->tcp_state = oldstate; mp = mi_tpi_err_ack_alloc(mp, TSYSERR, ENOMEM); @@ -7014,9 +7066,13 @@ ltcp = connp->conn_tcp; } if (tcp->tcp_conn_req_max && ltcp == NULL) { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_LISTEN); tcp->tcp_state = TCPS_LISTEN; } else if (old_state > TCPS_BOUND) { tcp->tcp_conn_req_max = 0; + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; } if (ltcp != NULL) @@ -7917,6 +7973,8 @@ * is only removed from connected hash. * */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_LISTEN); tcp->tcp_state = TCPS_LISTEN; tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp; tcp->tcp_eager_next_drop_q0 = tcp; @@ -7932,6 +7990,8 @@ tcp->tcp_ipha->ipha_src, tcp->tcp_lport); } } else { + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; } @@ -8293,6 +8353,7 @@ tcp->tcp_rq = q; tcp->tcp_wq = WR(q); + /* DTrace ignores this - it isn't a tcp:::state-change */ tcp->tcp_state = TCPS_IDLE; if ((err = tcp_init_values(tcp)) != 0) tcp_timers_stop(tcp); @@ -13294,6 +13355,13 @@ } } + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, + iphdr, tcp_t *, tcp, tcph_t *, tcph); + if (tcp->tcp_state == TCPS_SYN_RCVD && (flags & TH_ACK)) { + DTRACE_TCP5(accept__established, mblk_t *, NULL, conn_t *, + NULL, void_ip_t *, iphdr, tcp_t *, tcp, tcph_t *, tcph); + } + switch (tcp->tcp_state) { case TCPS_SYN_SENT: if (flags & TH_ACK) { @@ -13315,6 +13383,10 @@ ASSERT(tcp->tcp_suna + 1 == seg_ack); } if (flags & TH_RST) { + DTRACE_TCP5(connect__refused, mblk_t *, NULL, + conn_t *, NULL, void_ip_t *, iphdr, tcp_t *, NULL, + tcph_t *, tcph); + freemsg(mp); if (flags & TH_ACK) (void) tcp_clean_death(tcp, @@ -13389,9 +13461,18 @@ /* One for the SYN */ tcp->tcp_suna = tcp->tcp_iss + 1; tcp->tcp_valid_bits &= ~TCP_ISS_VALID; + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_ESTABLISHED); tcp->tcp_state = TCPS_ESTABLISHED; /* + * For DTrace observability, remember that we just + * established a connection and are about to send + * the final ACK. + */ + tcp->tcp_dtrace_connect_established = B_TRUE; + + /* * If SYN was retransmitted, need to reset all * retransmission info. This is because this * segment will be treated as a dup ACK. @@ -13498,6 +13579,8 @@ seg_seq++; break; } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_SYN_RCVD); tcp->tcp_state = TCPS_SYN_RCVD; mp1 = tcp_xmit_mp(tcp, tcp->tcp_xmit_head, tcp->tcp_mss, NULL, NULL, tcp->tcp_iss, B_FALSE, NULL, B_FALSE); @@ -14417,6 +14500,8 @@ tcp->tcp_max_swnd = new_swnd; tcp->tcp_swl1 = seg_seq; tcp->tcp_swl2 = seg_ack; + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, + tcp_t *, tcp, int32_t, TCPS_ESTABLISHED); tcp->tcp_state = TCPS_ESTABLISHED; tcp->tcp_valid_bits &= ~TCP_ISS_VALID; @@ -15053,6 +15138,9 @@ switch (tcp->tcp_state) { case TCPS_FIN_WAIT_1: if (tcp->tcp_fin_acked) { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_FIN_WAIT_2); tcp->tcp_state = TCPS_FIN_WAIT_2; /* * We implement the non-standard BSD/SunOS @@ -15085,6 +15173,9 @@ goto xmit_check; case TCPS_CLOSING: if (tcp->tcp_fin_acked) { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_TIME_WAIT); tcp->tcp_state = TCPS_TIME_WAIT; /* * Unconditionally clear the exclusive binding @@ -15130,16 +15221,25 @@ switch (tcp->tcp_state) { case TCPS_SYN_RCVD: case TCPS_ESTABLISHED: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_CLOSE_WAIT); tcp->tcp_state = TCPS_CLOSE_WAIT; /* Keepalive? */ break; case TCPS_FIN_WAIT_1: if (!tcp->tcp_fin_acked) { + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_CLOSING); tcp->tcp_state = TCPS_CLOSING; break; } /* FALLTHRU */ case TCPS_FIN_WAIT_2: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, int32_t, + TCPS_TIME_WAIT); tcp->tcp_state = TCPS_TIME_WAIT; /* * Unconditionally clear the exclusive binding @@ -16000,6 +16100,8 @@ /*NOTREACHED*/ } + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_IDLE); tcp->tcp_state = TCPS_IDLE; if (tcp->tcp_ipversion == IPV4_VERSION) tcp->tcp_ipha->ipha_src = 0; @@ -16042,6 +16144,7 @@ int retval; mblk_t *ire_mp; tcp_stack_t *tcps = tcp->tcp_tcps; + uint_t ip_hdr_len; switch (mp->b_datap->db_type) { case M_PROTO: @@ -16226,6 +16329,24 @@ TCP_TRACE_SEND_PKT); mblk_setcred(syn_mp, cr); DB_CPID(syn_mp) = pid; + + /* + * DTrace sending the first SYN as a + * tcp:::connect-request event. For DTrace + * only, the IP header length is found + * so that the TCP header can be retrieved. + */ + if (tcp->tcp_ipversion == IPV4_VERSION) + ip_hdr_len = IPH_HDR_LENGTH( + (ipha_t *)syn_mp->b_rptr); + else + ip_hdr_len = ip_hdr_length_v6(mp, + (ip6_t *)syn_mp->b_rptr); + DTRACE_TCP5(connect__request, mblk_t *, NULL, + conn_t *, NULL, void_ip_t *, + syn_mp->b_rptr, tcp_t *, tcp, tcph_t *, + &syn_mp->b_rptr[ip_hdr_len]); + tcp_send_data(tcp, tcp->tcp_wq, syn_mp); } after_syn_sent: @@ -17845,6 +17966,8 @@ V6_SET_ZERO(tcp->tcp_ip_src_v6); bzero(tcp->tcp_tcph->th_lport, sizeof (tcp->tcp_tcph->th_lport)); tcp_bind_hash_remove(tcp); + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_IDLE); tcp->tcp_state = TCPS_IDLE; tcp->tcp_mdt = B_FALSE; /* Send M_FLUSH according to TPI */ @@ -19523,6 +19646,7 @@ uint_t ire_fp_mp_len; tcp_stack_t *tcps = tcp->tcp_tcps; ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; + uint_t ip_hdr_len; ASSERT(DB_TYPE(mp) == M_DATA); @@ -19533,6 +19657,18 @@ src = ipha->ipha_src; dst = ipha->ipha_dst; + if (tcp->tcp_ipversion == IPV4_VERSION) { + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, ipha, tcp_t *, tcp, tcph_t *, + &mp->b_rptr[IPH_HDR_LENGTH(mp->b_rptr)]); + if (tcp->tcp_dtrace_connect_established) { + DTRACE_TCP5(connect__established, mblk_t *, NULL, + conn_t *, NULL, void_ip_t *, ipha, tcp_t *, tcp, + tcph_t *, &mp->b_rptr[IPH_HDR_LENGTH(mp->b_rptr)]); + tcp->tcp_dtrace_connect_established = B_FALSE; + } + } + /* * Drop off fast path for IPv6 and also if options are present or * we need to resolve a TS label. @@ -19548,6 +19684,21 @@ if (tcp->tcp_snd_zcopy_aware) mp = tcp_zcopy_disable(tcp, mp); TCP_STAT(tcps, tcp_ip_send); + + if (tcp->tcp_ipversion == IPV6_VERSION) { + ip_hdr_len = ip_hdr_length_v6(mp, (ip6_t *)mp->b_rptr); + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, mp->b_rptr, tcp_t *, tcp, tcph_t *, + &mp->b_rptr[ip_hdr_len]); + if (tcp->tcp_dtrace_connect_established) { + DTRACE_TCP5(connect__established, mblk_t *, + NULL, conn_t *, NULL, void_ip_t *, + mp->b_rptr, tcp_t *, tcp, tcph_t *, + &mp->b_rptr[ip_hdr_len]); + tcp->tcp_dtrace_connect_established = B_FALSE; + } + } + CALL_IP_WPUT(connp, q, mp); return; } @@ -20956,6 +21107,9 @@ * in order for us to get here in * the first place. */ + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_FIN_WAIT_1); tcp->tcp_state = TCPS_FIN_WAIT_1; /* @@ -21591,6 +21745,9 @@ IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, mp, ipha, up, IPPROTO_TCP, IP_SIMPLE_HDR_LENGTH, ntohs(ipha->ipha_length), cksum); + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, ipha, + tcp_t *, tcp, tcph_t *, &mp->b_rptr[IPH_HDR_LENGTH(mp->b_rptr)]); + /* * Append LSO flag to DB_LSOFLAGS(mp) and set the mss to DB_LSOMSS(mp). */ @@ -23236,6 +23393,13 @@ } ipsec_mp = nmp; + DTRACE_TCP5(send, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, + mp->b_rptr, tcp_t *, NULL, tcph_t *, tcph); + if (tcph->th_flags[0] == (TH_RST|TH_ACK)) { + DTRACE_TCP5(accept__refused, mblk_t *, NULL, conn_t *, NULL, + void_ip_t *, mp->b_rptr, tcp_t *, NULL, tcph_t *, tcph); + } + /* * NOTE: one might consider tracing a TCP packet here, but * this function has no active TCP state and no tcp structure @@ -23456,6 +23620,13 @@ seg_ack = BE32_TO_U32(tcph->th_ack); flags = tcph->th_flags[0]; + /* + * DTrace this "unknown" segment as a tcp:::receive, as we did + * just receive something that was TCP. + */ + DTRACE_TCP5(receive, mblk_t *, NULL, conn_t *, NULL, void_ip_t *, rptr, + tcp_t *, NULL, tcph_t *, tcph); + seg_len = msgdsize(mp) - (TCP_HDR_LENGTH(tcph) + ip_hdr_len); if (flags & TH_RST) { freemsg(ipsec_mp); @@ -23793,9 +23964,15 @@ switch (tcp->tcp_state) { case TCPS_SYN_RCVD: case TCPS_ESTABLISHED: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_FIN_WAIT_1); tcp->tcp_state = TCPS_FIN_WAIT_1; break; case TCPS_CLOSE_WAIT: + DTRACE_TCP4(state__change, void, NULL, + conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_LAST_ACK); tcp->tcp_state = TCPS_LAST_ACK; break; } @@ -24145,6 +24322,8 @@ * that since INADDR_ANY is all 0, we do not need to set * tcp_bound_source to INADDR_ANY here. */ + DTRACE_TCP4(state__change, void, NULL, conn_t *, NULL, tcp_t *, tcp, + int32_t, TCPS_BOUND); tcp->tcp_state = TCPS_BOUND; tcp->tcp_lport = port; tcp->tcp_exclbind = 1; --- old/usr/src/uts/common/inet/tcp/tcp_fusion.c Fri Aug 29 10:35:35 2008 +++ new/usr/src/uts/common/inet/tcp/tcp_fusion.c Fri Aug 29 10:35:33 2008 @@ -777,6 +777,8 @@ BUMP_LOCAL(tcp->tcp_obsegs); BUMP_LOCAL(peer_tcp->tcp_ibsegs); + DTRACE_TCPF5(send, void, NULL, conn_t *, NULL, + __dtrace_tcpf_ipinfo_t *, tcp, tcp_t *, tcp, uint_t, send_size); DTRACE_PROBE2(tcp__fuse__output, tcp_t *, tcp, uint_t, send_size); if (!TCP_IS_DETACHED(peer_tcp)) { @@ -917,6 +919,10 @@ if (tcp->tcp_direct_sockfs && !sd_rd_eof) (void) strrput_sig(q, B_TRUE); + DTRACE_TCPF5(receive, void, NULL, conn_t *, NULL, + __dtrace_tcpf_ipinfo_t *, tcp, tcp_t *, tcp, uint_t, + tcp->tcp_rcv_cnt); + ASSERT(cnt == tcp->tcp_rcv_cnt); tcp->tcp_rcv_last_head = NULL; tcp->tcp_rcv_last_tail = NULL; @@ -1026,6 +1032,9 @@ if ((mp = tcp->tcp_rcv_list) != NULL) { + DTRACE_TCPF5(receive, void, NULL, conn_t *, NULL, + __dtrace_tcpf_ipinfo_t *, tcp, tcp_t *, tcp, uint_t, + tcp->tcp_rcv_cnt); DTRACE_PROBE3(tcp__fuse__rrw, tcp_t *, tcp, uint32_t, tcp->tcp_rcv_cnt, ssize_t, dp->d_uio.uio_resid); --- old/usr/src/uts/common/sys/sdt.h Fri Aug 29 10:35:40 2008 +++ new/usr/src/uts/common/sys/sdt.h Fri Aug 29 10:35:39 2008 @@ -239,6 +239,50 @@ type3, arg3, type4, arg4, type5, arg5, type6, arg6, \ type7, arg7); +#define DTRACE_TCP(name) \ + DTRACE_PROBE(__tcp_##name); + +#define DTRACE_TCP1(name, type1, arg1) \ + DTRACE_PROBE1(__tcp_##name, type1, arg1); + +#define DTRACE_TCP2(name, type1, arg1, type2, arg2) \ + DTRACE_PROBE2(__tcp_##name, type1, arg1, type2, arg2); + +#define DTRACE_TCP3(name, type1, arg1, type2, arg2, type3, arg3) \ + DTRACE_PROBE3(__tcp_##name, type1, arg1, type2, arg2, type3, arg3); + +#define DTRACE_TCP4(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4) \ + DTRACE_PROBE4(__tcp_##name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4); + +#define DTRACE_TCP5(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5) \ + DTRACE_PROBE5(__tcp_##name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5); + +#define DTRACE_TCPF(name) \ + DTRACE_PROBE(__tcpf_##name); + +#define DTRACE_TCPF1(name, type1, arg1) \ + DTRACE_PROBE1(__tcpf_##name, type1, arg1); + +#define DTRACE_TCPF2(name, type1, arg1, type2, arg2) \ + DTRACE_PROBE2(__tcpf_##name, type1, arg1, type2, arg2); + +#define DTRACE_TCPF3(name, type1, arg1, type2, arg2, type3, arg3) \ + DTRACE_PROBE3(__tcpf_##name, type1, arg1, type2, arg2, type3, arg3); + +#define DTRACE_TCPF4(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4) \ + DTRACE_PROBE4(__tcpf_##name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4); + +#define DTRACE_TCPF5(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5) \ + DTRACE_PROBE5(__tcpf_##name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5); + #define DTRACE_SYSEVENT2(name, type1, arg1, type2, arg2) \ DTRACE_PROBE2(__sysevent_##name, type1, arg1, type2, arg2);