X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fqswlnd%2Fqswlnd_cb.c;h=99eb1cc134828e3300d2e8ccd1b13ddd8adbac45;hb=07bd49670282fcd75f1a937b621aa3c11db88407;hp=ff504565160e484e3b0ee997f81e62fc95dbfcd3;hpb=5c61559c099f9343a36886f4746ac966e4b4b70f;p=fs%2Flustre-release.git diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index ff50456..99eb1cc 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -1,7 +1,7 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2012, Intel Corporation. * * Author: Eric Barton * @@ -352,42 +352,43 @@ kqswnal_csum_iov (__u32 csum, int offset, int nob, void kqswnal_put_idle_tx (kqswnal_tx_t *ktx) { - unsigned long flags; + unsigned long flags; - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; + kqswnal_unmap_tx(ktx); /* release temporary mappings */ + ktx->ktx_state = KTX_IDLE; - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags); - list_del (&ktx->ktx_list); /* take off active list */ - list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); + cfs_list_del(&ktx->ktx_list); /* take off active list */ + cfs_list_add(&ktx->ktx_list, &kqswnal_data.kqn_idletxds); - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); + spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); } kqswnal_tx_t * kqswnal_get_idle_tx (void) { - unsigned long flags; - kqswnal_tx_t *ktx; + unsigned long flags; + kqswnal_tx_t *ktx; - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags); - if (kqswnal_data.kqn_shuttingdown || - list_empty (&kqswnal_data.kqn_idletxds)) { - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); + if (kqswnal_data.kqn_shuttingdown || + cfs_list_empty(&kqswnal_data.kqn_idletxds)) { + spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); - return NULL; - } + return NULL; + } - ktx = list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); + ktx = cfs_list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, + ktx_list); + cfs_list_del (&ktx->ktx_list); - list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); + cfs_list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); ktx->ktx_launcher = current->pid; - atomic_inc(&kqswnal_data.kqn_pending_txs); + atomic_inc(&kqswnal_data.kqn_pending_txs); - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); + spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ LASSERT (ktx->ktx_nmappedpages == 0); @@ -397,16 +398,16 @@ kqswnal_get_idle_tx (void) void kqswnal_tx_done_in_thread_context (kqswnal_tx_t *ktx) { - lnet_msg_t *lnetmsg0 = NULL; - lnet_msg_t *lnetmsg1 = NULL; - int status0 = 0; - int status1 = 0; - kqswnal_rx_t *krx; - - LASSERT (!in_interrupt()); - - if (ktx->ktx_status == -EHOSTDOWN) - kqswnal_notify_peer_down(ktx); + lnet_msg_t *lnetmsg0 = NULL; + lnet_msg_t *lnetmsg1 = NULL; + int status0 = 0; + int status1 = 0; + kqswnal_rx_t *krx; + + LASSERT (!in_interrupt()); + + if (ktx->ktx_status == -EHOSTDOWN) + kqswnal_notify_peer_down(ktx); switch (ktx->ktx_state) { case KTX_RDMA_FETCH: /* optimized PUT/REPLY handled */ @@ -503,23 +504,23 @@ kqswnal_tx_done_in_thread_context (kqswnal_tx_t *ktx) void kqswnal_tx_done (kqswnal_tx_t *ktx, int status) { - unsigned long flags; + unsigned long flags; - ktx->ktx_status = status; + ktx->ktx_status = status; - if (!in_interrupt()) { - kqswnal_tx_done_in_thread_context(ktx); - return; - } + if (!in_interrupt()) { + kqswnal_tx_done_in_thread_context(ktx); + return; + } - /* Complete the send in thread context */ - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail(&ktx->ktx_schedlist, - &kqswnal_data.kqn_donetxds); - wake_up(&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); + /* Complete the send in thread context */ + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); + + cfs_list_add_tail(&ktx->ktx_schedlist, + &kqswnal_data.kqn_donetxds); + wake_up(&kqswnal_data.kqn_sched_waitq); + + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); } static void @@ -535,7 +536,7 @@ kqswnal_txhandler(EP_TXD *txd, void *arg, int status) if (status != EP_SUCCESS) { - CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n", + CNETERR("Tx completion to %s failed: %d\n", libcfs_nid2str(ktx->ktx_nid), status); status = -EHOSTDOWN; @@ -601,13 +602,13 @@ kqswnal_txhandler(EP_TXD *txd, void *arg, int status) int kqswnal_launch (kqswnal_tx_t *ktx) { - /* Don't block for transmit descriptor if we're in interrupt context */ - int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int dest = kqswnal_nid2elanid (ktx->ktx_nid); - unsigned long flags; - int rc; + /* Don't block for transmit descriptor if we're in interrupt context */ + int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; + int dest = kqswnal_nid2elanid (ktx->ktx_nid); + unsigned long flags; + int rc; - ktx->ktx_launchtime = cfs_time_current(); + ktx->ktx_launchtime = cfs_time_current(); if (kqswnal_data.kqn_shuttingdown) return (-ESHUTDOWN); @@ -652,7 +653,7 @@ kqswnal_launch (kqswnal_tx_t *ktx) kqswnal_txhandler, ktx, NULL, ktx->ktx_frags, ktx->ktx_nfrag); break; - + default: LBUG(); rc = -EINVAL; /* no compiler warning please */ @@ -664,16 +665,19 @@ kqswnal_launch (kqswnal_tx_t *ktx) return (0); case EP_ENOMEM: /* can't allocate ep txd => queue for later */ - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - list_add_tail (&ktx->ktx_schedlist, &kqswnal_data.kqn_delayedtxds); - wake_up (&kqswnal_data.kqn_sched_waitq); + cfs_list_add_tail(&ktx->ktx_schedlist, + &kqswnal_data.kqn_delayedtxds); + wake_up(&kqswnal_data.kqn_sched_waitq); - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, + flags); return (0); default: /* fatal error */ - CDEBUG (D_NETERROR, "Tx to %s failed: %d\n", libcfs_nid2str(ktx->ktx_nid), rc); + CNETERR ("Tx to %s failed: %d\n", + libcfs_nid2str(ktx->ktx_nid), rc); kqswnal_notify_peer_down(ktx); return (-EHOSTUNREACH); } @@ -895,9 +899,9 @@ kqswnal_rdma (kqswnal_rx_t *krx, lnet_msg_t *lntmsg, ktx->ktx_args[0] = krx; ktx->ktx_args[1] = lntmsg; - LASSERT (atomic_read(&krx->krx_refcount) > 0); + LASSERT (atomic_read(&krx->krx_refcount) > 0); /* Take an extra ref for the completion callback */ - atomic_inc(&krx->krx_refcount); + atomic_inc(&krx->krx_refcount); /* Map on the rail the RPC prefers */ ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx, @@ -974,7 +978,7 @@ kqswnal_rdma (kqswnal_rx_t *krx, lnet_msg_t *lntmsg, kqswnal_put_idle_tx (ktx); } - atomic_dec(&kqswnal_data.kqn_pending_txs); + atomic_dec(&kqswnal_data.kqn_pending_txs); return (rc); } @@ -1001,18 +1005,18 @@ kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", payload_nob, payload_niov, libcfs_id2str(target)); - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= LNET_MAX_IOV); - /* It must be OK to kmap() if required */ - LASSERT (payload_kiov == NULL || !in_interrupt ()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - if (kqswnal_nid2elanid (target.nid) < 0) { - CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid)); - return -EIO; - } + if (kqswnal_nid2elanid (target.nid) < 0) { + CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid)); + return -EIO; + } /* I may not block for a transmit descriptor if I might block the * router, receiver, or an interrupt handler. */ @@ -1218,23 +1222,23 @@ kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) #endif nob += payload_nob; } - + ktx->ktx_port = (nob <= KQSW_SMALLMSG) ? EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE; rc = kqswnal_launch (ktx); out: - CDEBUG(rc == 0 ? D_NET : D_NETERROR, "%s %d bytes to %s%s: rc %d\n", - routing ? (rc == 0 ? "Routed" : "Failed to route") : - (rc == 0 ? "Sent" : "Failed to send"), - nob, libcfs_nid2str(target.nid), - target_is_router ? "(router)" : "", rc); + CDEBUG_LIMIT(rc == 0? D_NET :D_NETERROR, "%s %d bytes to %s%s: rc %d\n", + routing ? (rc == 0 ? "Routed" : "Failed to route") : + (rc == 0 ? "Sent" : "Failed to send"), + nob, libcfs_nid2str(target.nid), + target_is_router ? "(router)" : "", rc); if (rc != 0) { lnet_msg_t *repmsg = (lnet_msg_t *)ktx->ktx_args[2]; int state = ktx->ktx_state; - + kqswnal_put_idle_tx (ktx); if (state == KTX_GETTING && repmsg != NULL) { @@ -1250,14 +1254,14 @@ kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) } - atomic_dec(&kqswnal_data.kqn_pending_txs); + atomic_dec(&kqswnal_data.kqn_pending_txs); return (rc == 0 ? 0 : -EIO); } void kqswnal_requeue_rx (kqswnal_rx_t *krx) { - LASSERT (atomic_read(&krx->krx_refcount) == 0); + LASSERT (atomic_read(&krx->krx_refcount) == 0); LASSERT (!krx->krx_rpc_reply_needed); krx->krx_state = KRX_POSTED; @@ -1292,29 +1296,29 @@ kqswnal_rpc_complete (EP_RXD *rxd) void kqswnal_rx_done (kqswnal_rx_t *krx) { - int rc; + int rc; - LASSERT (atomic_read(&krx->krx_refcount) == 0); + LASSERT (atomic_read(&krx->krx_refcount) == 0); - if (krx->krx_rpc_reply_needed) { - /* We've not completed the peer's RPC yet... */ - krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC; - krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION; + if (krx->krx_rpc_reply_needed) { + /* We've not completed the peer's RPC yet... */ + krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC; + krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION; - LASSERT (!in_interrupt()); + LASSERT (!in_interrupt()); - rc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rpc_complete, krx, - &krx->krx_rpc_reply.ep_statusblk, - NULL, NULL, 0); - if (rc == EP_SUCCESS) - return; + rc = ep_complete_rpc(krx->krx_rxd, + kqswnal_rpc_complete, krx, + &krx->krx_rpc_reply.ep_statusblk, + NULL, NULL, 0); + if (rc == EP_SUCCESS) + return; - CERROR("can't complete RPC: %d\n", rc); - krx->krx_rpc_reply_needed = 0; - } + CERROR("can't complete RPC: %d\n", rc); + krx->krx_rpc_reply_needed = 0; + } - kqswnal_requeue_rx(krx); + kqswnal_requeue_rx(krx); } void @@ -1329,7 +1333,7 @@ kqswnal_parse (kqswnal_rx_t *krx) int nob; int rc; - LASSERT (atomic_read(&krx->krx_refcount) == 1); + LASSERT (atomic_read(&krx->krx_refcount) == 1); if (krx->krx_nob < offsetof(kqswnal_msg_t, kqm_u)) { CERROR("Short message %d received from %s\n", @@ -1517,7 +1521,7 @@ kqswnal_rxhandler(EP_RXD *rxd) /* Default to failure if an RPC reply is requested but not handled */ krx->krx_rpc_reply.msg.status = -EPROTO; - atomic_set (&krx->krx_refcount, 1); + atomic_set (&krx->krx_refcount, 1); if (status != EP_SUCCESS) { /* receives complete with failure when receiver is removed */ @@ -1526,21 +1530,21 @@ kqswnal_rxhandler(EP_RXD *rxd) else CERROR("receive status failed with status %d nob %d\n", ep_rxd_status(rxd), nob); - kqswnal_rx_decref(krx); - return; - } + kqswnal_rx_decref(krx); + return; + } - if (!in_interrupt()) { - kqswnal_parse(krx); - return; - } + if (!in_interrupt()) { + kqswnal_parse(krx); + return; + } - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); - wake_up (&kqswnal_data.kqn_sched_waitq); + cfs_list_add_tail(&krx->krx_list, &kqswnal_data.kqn_readyrxds); + wake_up(&kqswnal_data.kqn_sched_waitq); - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); } int @@ -1555,17 +1559,17 @@ kqswnal_recv (lnet_ni_t *ni, unsigned int mlen, unsigned int rlen) { - kqswnal_rx_t *krx = (kqswnal_rx_t *)private; - lnet_nid_t fromnid; - kqswnal_msg_t *msg; - lnet_hdr_t *hdr; - kqswnal_remotemd_t *rmd; - int msg_offset; - int rc; - - LASSERT (!in_interrupt ()); /* OK to map */ - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); + kqswnal_rx_t *krx = (kqswnal_rx_t *)private; + lnet_nid_t fromnid; + kqswnal_msg_t *msg; + lnet_hdr_t *hdr; + kqswnal_remotemd_t *rmd; + int msg_offset; + int rc; + + LASSERT (!in_interrupt ()); /* OK to map */ + /* Either all pages or all vaddrs */ + LASSERT (!(kiov != NULL && iov != NULL)); fromnid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ep_rxd_node(krx->krx_rxd)); msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page); @@ -1651,21 +1655,21 @@ kqswnal_recv (lnet_ni_t *ni, } int -kqswnal_thread_start (int (*fn)(void *arg), void *arg) +kqswnal_thread_start(int (*fn)(void *arg), void *arg, char *name) { - long pid = kernel_thread (fn, arg, 0); + struct task_struct *task = cfs_thread_run(fn, arg, name); - if (pid < 0) - return ((int)pid); + if (IS_ERR(task)) + return PTR_ERR(task); - atomic_inc (&kqswnal_data.kqn_nthreads); - return (0); + atomic_inc(&kqswnal_data.kqn_nthreads); + return 0; } void kqswnal_thread_fini (void) { - atomic_dec (&kqswnal_data.kqn_nthreads); + atomic_dec (&kqswnal_data.kqn_nthreads); } int @@ -1678,51 +1682,52 @@ kqswnal_scheduler (void *arg) int counter = 0; int did_something; - cfs_daemonize ("kqswnal_sched"); cfs_block_allsigs (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); + + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); for (;;) { did_something = 0; - if (!list_empty (&kqswnal_data.kqn_readyrxds)) + if (!cfs_list_empty (&kqswnal_data.kqn_readyrxds)) { - krx = list_entry(kqswnal_data.kqn_readyrxds.next, - kqswnal_rx_t, krx_list); - list_del (&krx->krx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); + krx = cfs_list_entry(kqswnal_data.kqn_readyrxds.next, + kqswnal_rx_t, krx_list); + cfs_list_del (&krx->krx_list); + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, + flags); LASSERT (krx->krx_state == KRX_PARSE); kqswnal_parse (krx); did_something = 1; - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, + flags); } - if (!list_empty (&kqswnal_data.kqn_donetxds)) + if (!cfs_list_empty (&kqswnal_data.kqn_donetxds)) { - ktx = list_entry(kqswnal_data.kqn_donetxds.next, - kqswnal_tx_t, ktx_schedlist); - list_del_init (&ktx->ktx_schedlist); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); + ktx = cfs_list_entry(kqswnal_data.kqn_donetxds.next, + kqswnal_tx_t, ktx_schedlist); + cfs_list_del_init (&ktx->ktx_schedlist); + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, + flags); kqswnal_tx_done_in_thread_context(ktx); did_something = 1; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, + flags); } - if (!list_empty (&kqswnal_data.kqn_delayedtxds)) + if (!cfs_list_empty (&kqswnal_data.kqn_delayedtxds)) { - ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, - kqswnal_tx_t, ktx_schedlist); - list_del_init (&ktx->ktx_schedlist); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); + ktx = cfs_list_entry(kqswnal_data.kqn_delayedtxds.next, + kqswnal_tx_t, ktx_schedlist); + cfs_list_del_init (&ktx->ktx_schedlist); + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, + flags); rc = kqswnal_launch (ktx); if (rc != 0) { @@ -1730,39 +1735,44 @@ kqswnal_scheduler (void *arg) libcfs_nid2str(ktx->ktx_nid), rc); kqswnal_tx_done (ktx, rc); } - atomic_dec (&kqswnal_data.kqn_pending_txs); + atomic_dec (&kqswnal_data.kqn_pending_txs); did_something = 1; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, + flags); } /* nothing to do or hogging CPU */ if (!did_something || counter++ == KQSW_RESCHED) { - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); + spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, + flags); counter = 0; if (!did_something) { if (kqswnal_data.kqn_shuttingdown == 2) { - /* We only exit in stage 2 of shutdown when - * there's nothing left to do */ + /* We only exit in stage 2 of shutdown + * when there's nothing left to do */ break; } - rc = wait_event_interruptible_exclusive ( - kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown == 2 || - !list_empty(&kqswnal_data.kqn_readyrxds) || - !list_empty(&kqswnal_data.kqn_donetxds) || - !list_empty(&kqswnal_data.kqn_delayedtxds)); - LASSERT (rc == 0); - } else if (need_resched()) - schedule (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - } - - kqswnal_thread_fini (); - return (0); + rc = wait_event_interruptible_exclusive ( + kqswnal_data.kqn_sched_waitq, + kqswnal_data.kqn_shuttingdown == 2 || + !cfs_list_empty(&kqswnal_data. \ + kqn_readyrxds) || + !cfs_list_empty(&kqswnal_data. \ + kqn_donetxds) || + !cfs_list_empty(&kqswnal_data. \ + kqn_delayedtxds)); + LASSERT (rc == 0); + } else if (need_resched()) + schedule (); + + spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, + flags); + } + } + + kqswnal_thread_fini (); + return 0; }