--- /dev/null Fri Apr 4 13:31:06 2008 +++ new/src/sun_nws/idm/src/idm.c Fri Apr 4 13:31:06 2008 @@ -0,0 +1,1307 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at src/sun_nws/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at src/sun_nws/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)idm.c 1.19 08/03/26 SMI" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include /* TCP_NODELAY */ +#include /* _ALLOC_SLEEP */ +#include +#include + +#include + +#define IDM_NAME_VERSION "iSCSI Data Mover" + +extern struct mod_ops mod_miscops; +extern struct mod_ops mod_miscops; + +static struct modlmisc modlmisc = { + &mod_miscops, /* Type of module */ + IDM_NAME_VERSION +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlmisc, NULL +}; + +/* + * IDM Native Sockets transport operations + */ +idm_transport_ops_t idm_so_transport_ops = { + &idm_so_tx, /* it_tx_pdu */ + &idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */ + &idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */ + &idm_so_rx_datain, /* it_rx_datain */ + &idm_so_rx_rtt, /* it_rx_rtt */ + &idm_so_rx_dataout, /* it_rx_dataout */ + NULL, /* it_alloc_conn_rsrc */ + NULL, /* it_free_conn_rsrc */ + NULL, /* it_enable_datamover */ + NULL, /* it_conn_terminate */ + NULL, /* it_free_task_rsrc */ + &idm_so_notice_key_values, /* it_notice_key_values */ + &idm_so_conn_is_capable, /* it_conn_is_capable */ + &idm_so_buf_setup, /* it_buf_setup */ + &idm_so_buf_teardown, /* it_buf_teardown */ + &idm_so_tgt_svc_create, /* it_tgt_svc_create */ + &idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */ + &idm_so_tgt_svc_online, /* it_tgt_svc_online */ + &idm_so_tgt_svc_offline, /* it_tgt_svc_offline */ + &idm_so_tgt_conn_connect, /* it_tgt_conn_connect */ + &idm_so_ini_conn_create, /* it_ini_conn_create */ + &idm_so_ini_conn_destroy, /* it_ini_conn_destroy */ + &idm_so_ini_conn_connect, /* it_ini_conn_connect */ + &idm_so_ini_conn_disconnect /* it_ini_conn_disconnect */ +}; + +/* + * Global list of transport handles + * These are listed in preferential order, so we can simply take the + * first "it_conn_is_capable" hit. Note also that the order maps to + * the order of the idm_transport_type_t list. + */ +idm_transport_t idm_transport_list[] = { + + /* iSER on InfiniBand transport handle */ + {IDM_TRANSPORT_TYPE_ISER, /* type */ + "/devices/ib/iser@0:iser", /* device path */ + NULL, /* LDI handle */ + NULL, /* transport ops */ + NULL}, /* transport caps */ + + /* IDM native sockets transport handle */ + {IDM_TRANSPORT_TYPE_SOCKETS, /* type */ + NULL, /* device path */ + NULL, /* LDI handle */ + NULL, /* transport ops */ + NULL} /* transport caps */ + +}; + +extern int idm_task_compare(const void *t1, const void *t2); +static int _idm_init(void); +static void _idm_fini(void); + +/* + * Hopefully once the socket transport API is in place we can consolidate + * all these implementation details into the socket transport code. + * JBDB - these resources are allocated during init - do we want a + * sockets transport init? if not, then these stay here, i think + */ +extern kmem_cache_t *idm_sotx_pdu_cache; +extern kmem_cache_t *idm_sorx_pdu_cache; +kmem_cache_t *idm_buf_cache; +kmem_cache_t *idm_task_cache; +vmem_t *idm_taskid_cache; +idm_idpool_t idm_conn_id_pool; + +idm_global_t idm; /* Global state */ + +char *idm_cs_name[CS_MAX_STATE]; /* Connection state names */ +char *idm_ce_name[CE_MAX_EVENT]; /* Connection event names */ + +int +_init(void) +{ + int rc; + + if ((rc = _idm_init()) != 0) { + return (rc); + } + + return (mod_install(&modlinkage)); +} + +int +_fini(void) +{ + + _idm_fini(); + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +/* + * idm_ini_conn_create + * + * This function is invoked by the iSCSI layer to create a connection context. + * This does not actually establish the socket connection. + * + * cr - Connection request parameters + * new_con - Output parameter that contains the new request if successful + * + */ +idm_status_t +idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con) +{ + idm_transport_caps_t *caps; + idm_transport_type_t type; + idm_transport_t *it; + idm_conn_t *ic; + int rc; + + ic = kmem_zalloc(sizeof (idm_conn_t), KM_SLEEP); + + /* Initialize data */ + mutex_init(&ic->ic_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ic->ic_cv, NULL, CV_DEFAULT, NULL); + ic->ic_conn_type = CONN_TYPE_INI; + ic->ic_conn_ops = cr->icr_conn_ops; + ic->ic_internal_cid = idm_cid_alloc(); + if (ic->ic_internal_cid == 0) { + kmem_free(ic, sizeof (idm_conn_t)); + return (IDM_STATUS_FAIL); + } + + /* Determine the transport for this connection */ + for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { + it = &idm_transport_list[type]; + + /* + * JBDB LATER + * if ((it->ldi_hdl == NULL) && (it->type != SOCKETS)) + * ldi_open_by_devname(it->device_path) + * but, for now... + */ + if (it->it_ops == NULL) { + /* transport is not registered */ + continue; + } + + rc = it->it_ops->it_conn_is_capable(cr, caps); + if (rc == IDM_STATUS_SUCCESS) { + ic->ic_transport_ops = it->it_ops; + ic->ic_transport_type = type; + ic->ic_transport_ops->it_buf_tx_to_ini = NULL; + ic->ic_transport_ops->it_buf_rx_from_ini = NULL; + break; + } + } + + /* create an avl tree to maintain active tasks per connection */ + avl_create(&ic->ic_task_tree, idm_task_compare, sizeof (idm_task_t), + offsetof(idm_task_t, idt_avl_link)); + + bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr, + sizeof (cr->cr_ini_dst_addr)); + + /* create the transport-specific connection components */ + rc = it->it_ops->it_ini_conn_create(cr, ic); + if (rc != IDM_STATUS_SUCCESS) { + avl_destroy(&ic->ic_task_tree); + kmem_free(ic, sizeof (idm_conn_t)); + return (IDM_STATUS_FAIL); /* XXX Error? */ + } + + *new_con = ic; + + mutex_enter(&idm.idm_global_mutex); + list_insert_tail(&idm.idm_ini_conn_list, ic); + mutex_exit(&idm.idm_global_mutex); + + return (IDM_STATUS_SUCCESS); +} + +/* + * idm_ini_conn_destroy + * + * Releases any resources associated with the connection. This is the + * complement to idm_ini_conn_create. + * ic - idm_conn_t structure representing the relevant connection + * + */ +void +idm_ini_conn_destroy(idm_conn_t *ic) +{ + idm_task_t *tnode; + void *cookie = NULL; + + mutex_enter(&idm.idm_global_mutex); + list_remove(&idm.idm_ini_conn_list, ic); + mutex_exit(&idm.idm_global_mutex); + + /* destroy the nodes in the tree */ + while ((tnode = + avl_destroy_nodes(&ic->ic_task_tree, &cookie)) != NULL) { + /* free data ? */ + kmem_cache_free(idm_task_cache, tnode); + } + avl_destroy(&ic->ic_task_tree); + + /* teardown the transport connection resources */ + (void) ic->ic_transport_ops->it_ini_conn_destroy(ic); + + kmem_free(ic, sizeof (idm_conn_t)); +} + +/* + * idm_ini_conn_connect + * + * Establish connection to the remote system identified in idm_conn_t. + * The connection parameters including the remote IP address were established + * in the call to idm_ini_conn_create. + * + * ic - idm_conn_t structure representing the relevant connection + * + * Returns success if the connection was established, otherwise some kind + * of meaningful error code. + * + * Upon return the initiator can send a "login" request when it is ready. + */ +idm_status_t +idm_ini_conn_connect(idm_conn_t *ic) +{ + idm_status_t istat; + if ((istat = idm_conn_sm_init(ic)) != 0) { + /* CRM: should I cleanup here or in iscsi? */ + return (ic->ic_conn_sm_status); + } + /* Kick state machine */ + idm_conn_event(ic, CE_CONNECT_REQ, NULL); + + /* Wait for login flag */ + mutex_enter(&ic->ic_state_mutex); + while (!(ic->ic_state_flags & CF_LOGIN_READY) && + !(ic->ic_state_flags & CF_ERROR)) { + cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); + } + mutex_exit(&ic->ic_state_mutex); + + if (ic->ic_state_flags & CF_ERROR) { + /* ic->ic_conn_sm_status will contains failure status */ + return (ic->ic_conn_sm_status); + } + + /* Ready to login */ + ASSERT(ic->ic_state_flags & CF_LOGIN_READY); + + return (0); +} + +/* + * idm_ini_conn_disconnect + * + * Forces a connection (previously established using idm_ini_conn_connect) + * to perform a controlled shutdown, cleaning up any outstanding requests. + * + * ic - idm_conn_t structure representing the relevant connection + * + * ** For now lets assume this is synchronous and it will return when the + * connection has been properly shutdown. + */ + +void +idm_ini_conn_disconnect(idm_conn_t *ic) +{ + +} + +/* + * idm_tgt_svc_create + * + * The target calls this service to obtain a service context for each available + * transport, starting a service of each type related to the IP address and port + * passed. The idm_svc_req_t contains the service parameters. + */ +idm_status_t +idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc) +{ + idm_transport_type_t type; + idm_transport_t *it; + idm_svc_t *is; + int rc; + + /* + * XXX We really need a "server" state machine here. The state + * machine should monitor the state of the listening connection. + * As connections are accepted the server state machine keeps + * track of outstanding connections. When the service is terminated + * with idm_tgt_svc_disconnect the state machine performs a controlled + * shutdown, terminating each connection. + * + * For now we can probably get by without it. + */ + *new_svc = NULL; + is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP); + + /* Initialize transport-agnostic components of the service handle */ + is->is_port = sr->sr_port; + is->is_conn_ops = sr->sr_conn_ops; + mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL); + list_create(&is->is_conn_list, sizeof (idm_conn_t), + offsetof(idm_conn_t, ic_list_node)); + + /* + * Loop through the transports, configuring the transport-specific + * components of each one. + */ + for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { + it = &idm_transport_list[type]; + if (it->it_ops == NULL) { + /* transport is not registered */ + continue; + } + + rc = it->it_ops->it_tgt_svc_create(sr, is); + if (rc != IDM_STATUS_SUCCESS) { + /* + * JBDB - how best to clean up a single failure + * when multiple transports are being config'd? + */ + kmem_free(is, sizeof (idm_svc_t)); + return (rc); + } + } + + *new_svc = is; + + mutex_enter(&idm.idm_global_mutex); + list_insert_tail(&idm.idm_tgt_svc_list, is); + mutex_exit(&idm.idm_global_mutex); + + return (IDM_STATUS_SUCCESS); +} + +/* + * idm_tgt_svc_destroy + * + * is - idm_svc_t returned by the call to idm_tgt_svc_create + * + * Cleanup any resources associated with the idm_svc_t. + */ +void +idm_tgt_svc_destroy(idm_svc_t *is) +{ + idm_transport_type_t type; + idm_transport_t *it; + + /* remove this service from the global list */ + mutex_enter(&idm.idm_global_mutex); + list_remove(&idm.idm_tgt_svc_list, is); + mutex_exit(&idm.idm_global_mutex); + + /* tear down the svc resources */ + list_destroy(&is->is_conn_list); + cv_destroy(&is->is_count_cv); + mutex_destroy(&is->is_count_mutex); + cv_destroy(&is->is_cv); + mutex_destroy(&is->is_mutex); + + /* teardown each transport-specific service */ + for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { + it = &idm_transport_list[type]; + if (it->it_ops == NULL) { + continue; + } + + it->it_ops->it_tgt_svc_destroy(is); + } + + /* free the svc handle */ + kmem_free(is, sizeof (idm_svc_t)); +} + +/* + * idm_tgt_svc_online + * + * is - idm_svc_t returned by the call to idm_tgt_svc_create + * + * Online each transport service, as we want this target to be accessible + * via any configured transport. + * + * When the initiator establishes a new connection to the target, IDM will + * call the "new connect" callback defined in the idm_svc_req_t structure + * and it will pass an idm_conn_t structure representing that new connection. + */ +idm_status_t +idm_tgt_svc_online(idm_svc_t *is) +{ + + idm_transport_type_t type; + idm_transport_t *it; + int rc; + + /* Walk through each of the transports and online them */ + for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { + it = &idm_transport_list[type]; + if (it->it_ops == NULL) { + /* transport is not registered */ + continue; + } + + rc = it->it_ops->it_tgt_svc_online(is); + if (rc != IDM_STATUS_SUCCESS) { + /* + * JBDB - currently, iscsit invokes this routine, + * and cleans up after itself. How best to handle + * this for multiple transports? + */ + return (IDM_STATUS_FAIL); + } + } + + return (IDM_STATUS_SUCCESS); +} + +/* + * idm_tgt_svc_offline + * + * is - idm_svc_t returned by the call to idm_tgt_svc_create + * + * Shutdown any online target services. + */ +void +idm_tgt_svc_offline(idm_svc_t *is) +{ + idm_transport_type_t type; + idm_transport_t *it; + + /* Walk through each of the transports and offline them */ + for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { + it = &idm_transport_list[type]; + if (it->it_ops == NULL) { + /* transport is not registered */ + continue; + } + + it->it_ops->it_tgt_svc_offline(is); + } +} + +/* + * idm_tgt_svc_lookup + * + * Lookup a service instance listening on the specified port + */ +idm_svc_t * +idm_tgt_svc_lookup(uint16_t port) +{ + idm_svc_t *result; + + for (result = list_head(&idm.idm_tgt_svc_list); + result != NULL; + result = list_next(&idm.idm_tgt_svc_list, result)) { + if (result->is_port == port) { + return (result); + } + } + + return (NULL); +} + +/* + * idm_notice_key_values() + * Passes the set of key value pairs to the transport for validatation. + * This will be invoked once the connection is established. + */ +idm_status_t +idm_notice_key_values(idm_conn_t *ic, nvlist_t *request_nvl, + nvlist_t *response_nvl, nvlist_t *negotiated_nvl) +{ + int rc; + + ASSERT(ic->ic_transport_ops != NULL); + + rc = ic->ic_transport_ops->it_notice_key_values(ic, request_nvl, + response_nvl, negotiated_nvl); + if (rc != IDM_STATUS_SUCCESS) { + /* JBDB - this is unlikely, what's the best course of action? */ + } + + return (rc); +} + +/* + * idm_buf_tx_to_ini + * + * This is IDM's implementation of the 'Put_Data' operational primitive. + * + * This function is invoked by a target iSCSI layer to request its local + * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer + * on the remote iSCSI node. The I/O buffer represented by 'idb' is + * transferred to the initiator associated with task 'idt'. The connection + * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS, + * and the callback (idb->idb_buf_cb) at transfer completion are + * provided as input. + * + * This data transfer takes place transparently to the remote iSCSI layer, + * i.e. without its participation. + * + * Using sockets, IDM implements the data transfer by segmenting the data + * buffer into appropriately sized iSCSI PDUs and transmitting them to the + * initiator. iSER performs the transfer using RDMA write. + * + */ +idm_status_t +idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb, + uint32_t offset, uint32_t xfer_len, + idm_buf_cb_t idb_buf_cb, void *cb_arg) +{ + idm_status_t rc; + + idb->idb_task_binding = idt; + idb->idb_bufoffset = offset; + idb->idb_xfer_len = xfer_len; + idb->idb_buf_cb = idb_buf_cb; + idb->idb_cb_arg = cb_arg; + /* + * "In" buf list is for "Data In" PDU's, "Out" buf list is for + * "Data Out" PDU's + */ + mutex_enter(&idt->idt_mutex); + idt->idt_tx_to_ini_start++; + idm_listbuf_insert(&idt->idt_inbufv, idb, offset); + mutex_exit(&idt->idt_mutex); + + rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)(idt, idb); + + return (rc); +} + +/* + * idm_buf_rx_from_ini + * + * This is IDM's implementation of the 'Get_Data' operational primitive. + * + * This function is invoked by a target iSCSI layer to request its local + * Datamover layer to retrieve certain data identified by the R2T PDU from the + * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be + * mapped to the respective buffer by the task tags (ITT & TTT). + * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and + * the callback (idb->idb_buf_cb) notification for data transfer completion are + * are provided as input. + * + * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local + * Datamover layer, the local and remote Datamover layers transparently bring + * about the data transfer requested by the R2T PDU, without the participation + * of the iSCSI layers. + * + * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out() + * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read. + * + */ +idm_status_t +idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb, + uint32_t offset, uint32_t xfer_len, + idm_buf_cb_t idb_buf_cb, void *cb_arg) +{ + idm_status_t rc; + + idb->idb_task_binding = idt; + idb->idb_bufoffset = offset; + idb->idb_xfer_len = xfer_len; + idb->idb_buf_cb = idb_buf_cb; + idb->idb_cb_arg = cb_arg; + /* + * "In" buf list is for "Data In" PDU's, "Out" buf list is for + * "Data Out" PDU's + */ + mutex_enter(&idt->idt_mutex); + idt->idt_rx_from_ini_start++; + idm_listbuf_insert(&idt->idt_outbufv, idb, offset); + mutex_exit(&idt->idt_mutex); + + rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)(idt, idb); + + return (rc); +} + +/* + * idm_buf_alloc + * + * Allocates a buffer handle and allocates a buffer of size "buflen" if the + * bufptr is NULL. If bufptr is not NULL, bufptr is assigned to the buffer. + * + * ic - connection on which the buffer will be transferred + * bufptr - allocate memory for buffer if NULL, else assign to buffer + * buflen - length of buffer + * flags - data transfer direction 'inbound' or 'outbound' + * + * Returns idm_buf_t handle if successful, otherwise NULL + */ +idm_buf_t * +idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen) +{ + idm_buf_t *buf = NULL; + + ASSERT(ic != NULL); + ASSERT(idm_buf_cache != NULL); + ASSERT(buflen > 0); + + buf = kmem_cache_alloc(idm_buf_cache, KM_NOSLEEP); + if (buf == NULL) { + return (NULL); + } + + buf->idb_buf = (bufptr == NULL) ? + kmem_alloc(buflen, KM_NOSLEEP) : bufptr; + + if (buf->idb_buf == NULL) { + kmem_cache_free(idm_buf_cache, buf); + return (NULL); + } + + idm_conn_hold_impl(ic, &ic->ic_buf_refcount); + buf->idb_ic = ic; + buf->idb_buflen = buflen; + buf->idb_exp_offset = 0; + +#ifdef DEBUG + memset(&buf->idb_buflink, 0, sizeof (list_node_t)); + buf->idb_bufoffset = 0; + buf->idb_mr_handle = NULL; + buf->idb_buf_cb = NULL; +#endif + + return (buf); + +} + +/* + * idm_buf_free + * + * Release a buffer handle along with the associated buffer that was allocated + * or assigned with idm_buf_alloc + */ +void +idm_buf_free(idm_buf_t *buf) +{ + idm_conn_t *ic = buf->idb_ic; + + + buf->idb_task_binding = NULL; + + kmem_free(buf->idb_buf, buf->idb_buflen); + kmem_cache_free(idm_buf_cache, buf); + idm_conn_rele_impl(ic, &ic->ic_buf_refcount); +} + +/* + * idm_buf_bind_in + * + * This function associates a buffer with a task. This is only for use by the + * iSCSI initiator that will have only one buffer per transfer direction + * + */ +void +idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf) +{ + buf->idb_task_binding = idt; + buf->idb_ic = idt->idt_ic; + idm_conn_hold_impl(buf->idb_ic, &buf->idb_ic->ic_buf_refcount); + mutex_enter(&idt->idt_mutex); + idm_listbuf_insert(&idt->idt_inbufv, buf, 0); + mutex_exit(&idt->idt_mutex); +} + +void +idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf) +{ + buf->idb_ic = idt->idt_ic; + buf->idb_task_binding = idt; + idm_conn_hold_impl(buf->idb_ic, &buf->idb_ic->ic_buf_refcount); + mutex_enter(&idt->idt_mutex); + idm_listbuf_insert(&idt->idt_outbufv, buf, 0); + mutex_exit(&idt->idt_mutex); +} + +void +idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf) +{ + mutex_enter(&idt->idt_mutex); + list_remove(&idt->idt_inbufv, buf); + mutex_exit(&idt->idt_mutex); + buf->idb_task_binding = NULL; +} + +void +idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf) +{ + mutex_enter(&idt->idt_mutex); + list_remove(&idt->idt_outbufv, buf); + mutex_exit(&idt->idt_mutex); + buf->idb_task_binding = NULL; +} + +/* + * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the + * iSCSI PDU + */ +idm_buf_t * +idm_buf_find(void *lbuf, size_t data_offset) +{ + idm_buf_t *idb; + list_t *lst = (list_t *)lbuf; + + /* iterate through the list to find the buffer */ + for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) { + + /* CRM: it doesn't like it when this is taken out */ + if (idb->idb_ic->ic_conn_type == CONN_TYPE_INI) + return (idb); + if ((data_offset >= idb->idb_bufoffset) && + (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) { + + return (idb); + } + } + + return (NULL); +} + +/* + * idm_task_alloc + * + * This function will allocate a idm_task_t structure. A task tag is also + * generated and saved in idt_tt. The task is not active. + */ +idm_task_t * +idm_task_alloc(idm_conn_t *ic) +{ + void *addr; + idm_task_t *idt; + + ASSERT(ic != NULL); + + idt = kmem_cache_alloc(idm_task_cache, KM_SLEEP); + if (idt == NULL) { + return (NULL); + } + + ASSERT(list_is_empty(&idt->idt_inbufv)); + ASSERT(list_is_empty(&idt->idt_outbufv)); + + idm_conn_hold_impl(ic, &ic->ic_task_refcount); + idt->idt_ic = ic; + idt->idt_active = B_FALSE; + idt->idt_private = NULL; + idt->idt_exp_sn = 0; + + return (idt); +} + +/* + * idm_task_start + * + * Add the task to an AVL tree to notify IDM about a new task. The caller + * sets up the idm_task_t structure with a prior call to idm_task_alloc(). + * The task service does not function as a task/work engine, it is the + * responsibility of the initiator to start the data transfer and free the + * resources. + */ +void +idm_task_start(idm_task_t *idt) +{ + idm_conn_t *ic; + + ASSERT(idt != NULL); + + ic = idt->idt_ic; + + /* mark the task as ACTIVE */ + idt->idt_active = B_TRUE; + idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done = + idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 0; + + /* + * add the task to AVL tree. The AVL tree keeps track of active tasks. + * memory for avl_tree is allocated earlier in conn_create(). + * Tasks with duplicate tags are considered to be an error + */ + mutex_enter(&ic->ic_mutex); + avl_add(&ic->ic_task_tree, idt); + mutex_exit(&ic->ic_mutex); + +} + +/* + * idm_task_done + * + * This function will remove the task from the AVL tree indicating that the + * task is no longer active. + */ +void +idm_task_done(idm_task_t *idt) +{ + idm_conn_t *ic; + + ASSERT(idt != NULL); + + ic = idt->idt_ic; + + mutex_enter(&ic->ic_mutex); + avl_remove(&ic->ic_task_tree, idt); + mutex_exit(&ic->ic_mutex); + + idt->idt_active = B_FALSE; + +} + +/* + * idm_task_free + * + * This function will free the Task Tag and the memory allocated for the task + * idm_task_done should be called prior to this call + */ +void +idm_task_free(idm_task_t *idt) +{ + idm_conn_t *ic = idt->idt_ic; + ASSERT(idt != NULL); + + /* vmem free and list free in destructor */ + + kmem_cache_free(idm_task_cache, idt); + + idm_conn_rele_impl(ic, &ic->ic_task_refcount); +} + +/* + * idm_task_find + * + * This function looks up a task by task tag + */ +idm_task_t * +idm_task_find(idm_conn_t *ic, uint32_t itt) +{ + idm_task_t query, *idt; + avl_index_t where; + + query.idt_tt = itt; /* lookup task by task tag */ + + mutex_enter(&ic->ic_mutex); + if ((idt = avl_find(&ic->ic_task_tree, &query, &where)) == NULL) { + mutex_exit(&ic->ic_mutex); + /* task not found in tree, return NULL */ + return (NULL); + } + mutex_exit(&ic->ic_mutex); + + return (idt); +} + +/* + * idm_pdu_tx + * + * This is IDM's implementation of the 'Send_Control' operational primitive. + * This function is invoked by an initiator iSCSI layer requesting the transfer + * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a + * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover + * layer to the peer iSCSI layer in the remote iSCSI node. The connection info + * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size + * are provided as input. + * + */ +void +idm_pdu_tx(idm_pdu_t *pdu) +{ + idm_conn_t *ic = pdu->isp_ic; + iscsi_login_rsp_hdr_t *login_rsp; + iscsi_logout_rsp_hdr_t *logout_rsp; + iscsi_async_evt_hdr_t *async_evt; + + /* + * If we are in full-featured mode then route SCSI-related + * commands to the appropriate function vector without checking + * the connection state. We will only be in full-feature mode + * when we are in an acceptable state for SCSI PDU's. + * + * We also need to ensure that there are no PDU events outstanding + * on the state machine. Any non-SCSI PDU's received in full-feature + * mode will result in PDU events and until these have been handled + * we need to route all PDU's through the state machine as PDU + * events to maintain ordering. XXX This scenario could cause + * some unfortunate pathological behavior as we suddenly route + * all our performance sensitive PDU's through a single-threaded + * slow path. We need to force this condition in testing under + * heavy load and see how we recover. It might be necessary to + * do something else like hold off SCSI I/O until the PDU event + * count has returned to 0. + */ + mutex_enter(&ic->ic_state_mutex); + if (ic->ic_ffm && (ic->ic_pdu_events == 0)) { + mutex_exit(&ic->ic_state_mutex); + switch (IDM_PDU_OPCODE(pdu)) { + case ISCSI_OP_SCSI_CMD: + case ISCSI_OP_SCSI_RSP: + case ISCSI_OP_SCSI_TASK_MGT_MSG: + case ISCSI_OP_SCSI_TASK_MGT_RSP: + case ISCSI_OP_SCSI_DATA: + case ISCSI_OP_SCSI_DATA_RSP: + case ISCSI_OP_RTT_RSP: + /* + * Send the PDU + */ + idm_pdu_tx_forward(ic, pdu); + return; + default: + break; + } + mutex_enter(&ic->ic_state_mutex); + } + mutex_exit(&ic->ic_state_mutex); + + /* + * Any PDU's processed outside of full-feature mode and non-SCSI + * PDU's in full-feature mode are handled by generating an + * event to the connection state machine. The state machine + * will validate the PDU against the current state and either + * transmit the PDU if the opcode is allowed or handle an + * error if the PDU is not allowed. + * + * This code-path will also generate any events that are implied + * by the PDU opcode. For example a "login response" with success + * status generates a CE_LOGOUT_SUCCESS_SND event. + */ + switch (IDM_PDU_OPCODE(pdu)) { + case ISCSI_OP_LOGIN_CMD: + idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu); + break; + case ISCSI_OP_LOGIN_RSP: + idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE); + break; + case ISCSI_OP_LOGOUT_CMD: + idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE); + break; + case ISCSI_OP_LOGOUT_RSP: + idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE); + break; + case ISCSI_OP_ASYNC_EVENT: + async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr; + switch (async_evt->async_event) { + case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT: + idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND, + (uintptr_t)pdu); + break; + case ISCSI_ASYNC_EVENT_SCSI_EVENT: + case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION: + case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS: + case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION: + default: + idm_conn_tx_pdu_event(ic, CE_MISC_TX, + (uintptr_t)pdu); + break; + } + break; + case ISCSI_OP_SCSI_CMD: + case ISCSI_OP_SCSI_DATA: + case ISCSI_OP_SCSI_DATA_RSP: + case ISCSI_OP_RTT_RSP: + case ISCSI_OP_SNACK_CMD: + case ISCSI_OP_NOOP_IN: + case ISCSI_OP_NOOP_OUT: + case ISCSI_OP_TEXT_CMD: + case ISCSI_OP_TEXT_RSP: + case ISCSI_OP_REJECT_MSG: + default: + /* + * Connection state machine will validate these PDU's against + * the current state. A PDU not allowed in the current + * state will cause a protocol error. + */ + idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); + break; + } +} + +/* + * Allocates a PDU along with memory for header and data. + */ + +idm_pdu_t * +idm_pdu_alloc(uint_t hdrlen, uint_t datalen) +{ + idm_pdu_t *result; + + /* + * IDM clients should cache these structures for performance + * critical paths. We can't cache effectively in IDM because we + * don't know the correct header and data size. + * + * Valid header length is assumed to be hdrlen and valid data + * length is assumed to be datalen. isp_hdrlen and isp_datalen + * can be adjusted after the PDU is returned if necessary. + */ + result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, KM_SLEEP); + result->isp_flags |= IDM_PDU_ALLOC; /* For idm_pdu_free sanity check */ + result->isp_hdr = (iscsi_hdr_t *)(result + 1); /* Ptr. Arithmetic */ + result->isp_hdrlen = hdrlen; + result->isp_hdrbuflen = hdrlen; + result->isp_data = (uint8_t *)result->isp_hdr + hdrlen; + result->isp_datalen = datalen; + result->isp_databuflen = datalen; + + return (result); +} + +/* + * Free a PDU previously allocated with idm_pdu_alloc() including any + * header and data space allocated as part of the original request. + * Additional memory regions referenced by subsequent modification of + * the isp_hdr and/or isp_data fields will not be freed. + */ +void +idm_pdu_free(idm_pdu_t *pdu) +{ + /* Make sure the structure was allocated using idm_pdu_alloc() */ + ASSERT(pdu->isp_flags & IDM_PDU_ALLOC); + kmem_free(pdu, + sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen); +} + +/* + * Initialize the connection, private and callback fields in a PDU. + */ +void +idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb) +{ + /* + * idm_pdu_complete() will call idm_pdu_free if the callback is + * NULL. This will only work if the PDU was originally allocated + * with idm_pdu_alloc(). + */ + ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) || + (pdu->isp_callback != NULL)); + pdu->isp_ic = ic; + pdu->isp_private = private; + pdu->isp_callback = cb; +} + +/* + * Initialize the header and header length field. This function should + * not be used to adjust the header length in a buffer allocated via + * pdu_pdu_alloc since it overwrites the existing header pointer. + */ +void +idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen) +{ + pdu->isp_hdr = (iscsi_hdr_t *)hdr; + pdu->isp_hdrlen = hdrlen; +} + +/* + * Initialize the data and data length fields. This function should + * not be used to adjust the data length of a buffer allocated via + * idm_pdu_alloc since it overwrites the existing data pointer. + */ +void +idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen) +{ + pdu->isp_data = data; + pdu->isp_datalen = datalen; +} + +void +idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status) +{ + if (pdu->isp_callback) { + /* XXX Might want to do a taskq in some cases */ + pdu->isp_status = status; + (*pdu->isp_callback)(pdu, status); + } else { + idm_pdu_free(pdu); + } +} + +void +idm_conn_hold(idm_conn_t *ic) +{ + idm_conn_hold_impl(ic, &ic->ic_client_refcount); +} + +void +idm_conn_rele(idm_conn_t *ic) +{ + idm_conn_rele_impl(ic, &ic->ic_client_refcount); +} + + +static int +_idm_init(void) +{ + int i; + + /* + * Setup the state/event names. Doing this programmatically + * allows us to ensure that names are mapped to the appropriate + * numerical enum value even if a state or event is added and + * the name table doesn't get updated + */ + for (i = 0; i < CS_MAX_STATE; i++) { + idm_cs_name[i] = "UNDEFINED"; + } + idm_cs_name[CS_S0_UNDEFINED] = "CS_S0_UNDEFINED"; + idm_cs_name[CS_S1_FREE] = "CS_S1_FREE"; + idm_cs_name[CS_S2_XPT_WAIT] = "CS_S2_XPT_WAIT"; + idm_cs_name[CS_S3_XPT_UP] = "CS_S3_XPT_UP"; + idm_cs_name[CS_S4_IN_LOGIN] = "CS_S4_IN_LOGIN"; + idm_cs_name[CS_S5_LOGGED_IN] = "CS_S5_LOGGED_IN"; + idm_cs_name[CS_S6_IN_LOGOUT] = "CS_S6_IN_LOGOUT"; + idm_cs_name[CS_S7_LOGOUT_REQ] = "CS_S7_LOGOUT_REQ"; + idm_cs_name[CS_S8_CLEANUP] = "CS_S8_CLEANUP"; + idm_cs_name[CS_S9_INIT_ERROR] = "CS_S9_INIT_ERROR"; + idm_cs_name[CS_S10_IN_CLEANUP] = "CS_S10_IN_CLEANUP"; + idm_cs_name[CS_S11_COMPLETE] = "CS_S11_COMPLETE"; + idm_cs_name[CS_MAX_STATE] = "CS_MAX_STATE"; + + for (i = 0; i < CE_MAX_EVENT; i++) { + idm_ce_name[i] = "UNDEFINED"; + } + idm_ce_name[CE_CONNECT_REQ] = "CE_CONNECT_REQ"; + idm_ce_name[CE_CONNECT_FAIL] = "CE_CONNECT_FAIL"; + idm_ce_name[CE_CONNECT_SUCCESS] = "CE_CONNECT_SUCCESS"; + idm_ce_name[CE_LOGIN_SND] = "CE_LOGIN_SND"; + idm_ce_name[CE_LOGIN_SUCCESS_RCV] = "CE_LOGIN_SUCCESS_RCV"; + idm_ce_name[CE_LOGIN_FAIL_RCV] = "CE_LOGIN_FAIL_RCV"; + idm_ce_name[CE_LOGOUT_THIS_CONN_SND] = "CE_LOGOUT_THIS_CONN_SND"; + idm_ce_name[CE_LOGOUT_OTHER_CONN_SND] = "CE_LOGOUT_OTHER_CONN_SND"; + idm_ce_name[CE_LOGOUT_SESSION_SND] = "CE_LOGOUT_SESSION_SND"; + idm_ce_name[CE_LOGOUT_SUCCESS_RCV] = "CE_LOGOUT_SUCCESS_RCV"; + idm_ce_name[CE_LOGOUT_FAIL_RCV] = "CE_LOGOUT_FAIL_RCV"; + idm_ce_name[CE_ASYNC_LOGOUT_RCV] = "CE_ASYNC_LOGOUT_RCV"; + idm_ce_name[CE_ASYNC_DROP_CONN_RCV] = "CE_ASYNC_DROP_CONN_RCV"; + idm_ce_name[CE_ASYNC_DROP_ALL_CONN_RCV] = "CE_ASYNC_DROP_ALL_CONN_RCV"; + idm_ce_name[CE_CONNECT_ACCEPT] = "CE_CONNECT_ACCEPT"; + idm_ce_name[CE_CONNECT_REJECT] = "CE_CONNECT_REJECT"; + idm_ce_name[CE_LOGIN_RCV] = "CE_LOGIN_RCV"; + idm_ce_name[CE_LOGIN_TIMEOUT] = "CE_LOGIN_TIMEOUT"; + idm_ce_name[CE_LOGIN_SUCCESS_SND] = "CE_LOGIN_SUCCESS_SND"; + idm_ce_name[CE_LOGIN_FAIL_SND] = "CE_LOGIN_FAIL_SND"; + idm_ce_name[CE_LOGOUT_THIS_CONN_RCV] = "CE_LOGOUT_THIS_CONN_RCV"; + idm_ce_name[CE_LOGOUT_OTHER_CONN_RCV] = "CE_LOGOUT_OTHER_CONN_RCV"; + idm_ce_name[CE_LOGOUT_SESSION_RCV] = "CE_LOGOUT_SESSION_RCV"; + idm_ce_name[CE_LOGOUT_SUCCESS_SND] = "CE_LOGOUT_SUCCESS_SND"; + idm_ce_name[CE_LOGOUT_FAIL_SND] = "CE_LOGOUT_FAIL_SND"; + idm_ce_name[CE_CLEANUP_TIMEOUT] = "CE_CLEANUP_TIMEOUT"; + idm_ce_name[CE_ASYNC_LOGOUT_SND] = "CE_ASYNC_LOGOUT_SND"; + idm_ce_name[CE_ASYNC_DROP_CONN_SND] = "CE_ASYNC_DROP_CONN_SND"; + idm_ce_name[CE_ASYNC_DROP_ALL_CONN_SND] = "CE_ASYNC_DROP_ALL_CONN_SND"; + idm_ce_name[CE_TRANSPORT_FAIL] = "CE_TRANSPORT_FAIL"; + idm_ce_name[CE_MISC_TX] = "CE_MISC_TX"; + idm_ce_name[CE_TX_PROTOCOL_ERROR] = "CE_TX_PROTOCOL_ERROR"; + idm_ce_name[CE_MISC_RX] = "CE_MISC_RX"; + idm_ce_name[CE_RX_PROTOCOL_ERROR] = "CE_RX_PROTOCOL_ERROR"; + idm_ce_name[CE_LOGOUT_SESSION_SUCCESS] = "CE_LOGOUT_SESSION_SUCCESS"; + idm_ce_name[CE_CONN_REINSTATE] = "CE_CONN_REINSTATE"; + idm_ce_name[CE_CONN_REINSTATE_SUCCESS] = "CE_CONN_REINSTATE_SUCCESS"; + idm_ce_name[CE_CONN_REINSTATE_FAIL] = "CE_CONN_REINSTATE_FAIL"; + idm_ce_name[CE_MAX_EVENT] = "CE_MAX_EVENT"; + + /* + * XXX Move these into idm_global_t to consolidate global state + * for easier MDB access + */ + mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL); + + idm.idm_global_taskq = taskq_create("IDM global taskq", 1, minclsyspri, + 1, 1, 0); + if (idm.idm_global_taskq == NULL) { + return (ENOMEM); + } + + /* Cache for IDM Data and R2T Transmit PDU's */ + idm_sotx_pdu_cache = kmem_cache_create("IDM TX PDU Cache", + sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8, + &idm_sotx_pdu_constructor, NULL, + NULL, NULL, NULL, KM_SLEEP); + + /* Cache for IDM Receive PDU's */ + idm_sorx_pdu_cache = kmem_cache_create("IDM RX PDU Cache", + sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8, + &idm_sorx_pdu_constructor, NULL, + NULL, NULL, NULL, KM_SLEEP); + + idm_buf_cache = kmem_cache_create("idm_buf_cache", sizeof (idm_buf_t), + 8, NULL, NULL, NULL, NULL, NULL, + KM_SLEEP); + + idm_task_cache = kmem_cache_create("idm_task_cache", + sizeof (idm_task_t), 8, &idm_task_constructor, &idm_task_destructor, + NULL, NULL, NULL, KM_SLEEP); + + list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t), + offsetof(idm_svc_t, is_list_node)); + list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t), + offsetof(idm_conn_t, ic_list_node)); + + /* + * Use vmem arenas to create task ids in the range [1, IDM_TASKIDS_MAX] + * task id 0 is never allocated, it is used to indicate a error + */ + if ((idm_taskid_cache = vmem_create("idm_tasktag_cache", (void *)1, + IDM_TASKIDS_MAX, 1, NULL, NULL, NULL, 0, + VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) { + cmn_err(CE_NOTE, "unable to create vmem arena"); + } + + /* + * JBDB later - walk idm_transport_list and open LDI handles + * on each available transport. For now, just set the sockets + * transport ops up. + */ + idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS].it_ops = + &idm_so_transport_ops; + + (void) idm_idpool_create(&idm_conn_id_pool); + + return (0); +} + +static void +_idm_fini(void) +{ + + /* JBDB later - clean up LDI handles on transport drivers */ + + /* idm_idpool_destroy(&idm_conn_id_pool); */ + vmem_destroy(idm_taskid_cache); + list_destroy(&idm.idm_ini_conn_list); + list_destroy(&idm.idm_tgt_svc_list); + kmem_cache_destroy(idm_task_cache); + kmem_cache_destroy(idm_buf_cache); + kmem_cache_destroy(idm_sorx_pdu_cache); + kmem_cache_destroy(idm_sotx_pdu_cache); + mutex_destroy(&idm.idm_global_mutex); +}