while holding CW lock, we drop CW lock and take EX lock and retry.
- disable pdirops by always EX lock on change and PR lock on lookup/readdir.
-Lustre-bug-id: LU-50" target="_blank">https://jira.whamcloud.com/browse/LU-50
+Lustre-bug-id: https://jira.whamcloud.com/browse/LU-50
Lustre-change: http://review.whamcloud.com/375
Signed-off-by: Liang Zhen <liang@whamcloud.com>
enum lnet_unlink unlink_in,
struct lnet_handle_md *md_handle_out);
-int LNetMDUnlink(struct lnet_handle_md md_in);
+int __LNetMDUnlink(struct lnet_handle_md md_in, bool discard);
+#define LNetMDUnlink(handle) __LNetMDUnlink(handle, false)
void lnet_assert_handler_unused(lnet_handler_t handler);
/** @} lnet_md */
void lnet_detach_rsp_tracker(struct lnet_libmd *md, int cpt);
void lnet_clean_zombie_rstqs(void);
+bool lnet_md_discarded(struct lnet_libmd *md);
void lnet_finalize(struct lnet_msg *msg, int rc);
bool lnet_send_error_simulation(struct lnet_msg *msg,
enum lnet_msg_hstatus *hstatus);
* call.
*/
#define LNET_MD_FLAG_HANDLING BIT(3)
+#define LNET_MD_FLAG_DISCARD BIT(4)
struct lnet_test_peer {
/* info about peers we are trying to fail */
spin_lock_init(&kiblnd_data.kib_connd_lock);
INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
+ INIT_LIST_HEAD(&kiblnd_data.kib_connd_waits);
INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
struct list_head kib_reconn_list;
/* peers wait for reconnection */
struct list_head kib_reconn_wait;
+ /* connections wait for completion */
+ struct list_head kib_connd_waits;
/*
* The second that peers are pulled out from \a kib_reconn_wait
* for reconnection.
__u16 ibc_queue_depth;
/* connections max frags */
__u16 ibc_max_frags;
+ /* count of timeout txs waiting on cq */
+ __u16 ibc_waits;
/* receive buffers owned */
unsigned int ibc_nrx:16;
/* scheduled for attention */
if (tx->tx_sending == 0) {
tx->tx_queued = 0;
list_move(&tx->tx_list, &zombies);
+ } else {
+ /* keep tx until cq destroy */
+ list_move(&tx->tx_list, &conn->ibc_zombie_txs);
+ conn->ibc_waits ++;
}
}
kiblnd_txlist_done(&zombies, -ECONNABORTED, LNET_MSG_STATUS_OK);
}
+static int
+kiblnd_tx_may_discard(struct kib_conn *conn)
+{
+ int rc = 0;
+ struct kib_tx *nxt;
+ struct kib_tx *tx;
+
+ spin_lock(&conn->ibc_lock);
+
+ list_for_each_entry_safe(tx, nxt, &conn->ibc_zombie_txs, tx_list) {
+ if (tx->tx_sending > 0 && tx->tx_lntmsg[0] &&
+ lnet_md_discarded(tx->tx_lntmsg[0]->msg_md)) {
+ tx->tx_sending --;
+ if (tx->tx_sending == 0) {
+ kiblnd_conn_decref(tx->tx_conn);
+ tx->tx_conn = NULL;
+ rc = 1;
+ }
+ }
+ }
+
+ spin_unlock(&conn->ibc_lock);
+ return rc;
+}
+
static void
kiblnd_finalise_conn(struct kib_conn *conn)
{
}
if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) {
- CERROR("Timed out tx: %s, %lld seconds\n",
+ CERROR("Timed out tx: %s(WSQ:%d%d%d), %lld seconds\n",
kiblnd_queue2str(conn, txs),
+ tx->tx_waiting, tx->tx_sending, tx->tx_queued,
kiblnd_timeout() +
ktime_ms_delta(ktime_get(),
tx->tx_deadline) / MSEC_PER_SEC);
}
if (!list_empty(&kiblnd_data.kib_connd_conns)) {
+ int wait;
conn = list_entry(kiblnd_data.kib_connd_conns.next,
struct kib_conn, ibc_list);
list_del(&conn->ibc_list);
dropped_lock = 1;
kiblnd_disconnect_conn(conn);
- kiblnd_conn_decref(conn);
+ wait = conn->ibc_waits;
+ if (wait == 0) /* keep ref for connd_wait, see below */
+ kiblnd_conn_decref(conn);
spin_lock_irqsave(lock, flags);
+
+ if (wait)
+ list_add_tail(&conn->ibc_list,
+ &kiblnd_data.kib_connd_waits);
}
while (reconn < KIB_RECONN_BREAK) {
spin_lock_irqsave(lock, flags);
}
+ if (!list_empty(&kiblnd_data.kib_connd_waits)) {
+ conn = list_entry(kiblnd_data.kib_connd_waits.next,
+ struct kib_conn, ibc_list);
+ list_del(&conn->ibc_list);
+ spin_unlock_irqrestore(lock, flags);
+
+ dropped_lock = kiblnd_tx_may_discard(conn);
+ if (dropped_lock)
+ kiblnd_conn_decref(conn);
+
+ spin_lock_irqsave(lock, flags);
+ if (dropped_lock == 0)
+ list_add_tail(&conn->ibc_list,
+ &kiblnd_data.kib_connd_waits);
+ }
+
/* careful with the jiffy wrap... */
timeout = (int)(deadline - jiffies);
if (timeout <= 0) {
* \retval -ENOENT If \a mdh does not point to a valid MD object.
*/
int
-LNetMDUnlink(struct lnet_handle_md mdh)
+__LNetMDUnlink(struct lnet_handle_md mdh, bool discard)
{
struct lnet_event ev;
struct lnet_libmd *md = NULL;
handler = md->md_handler;
}
+ if (discard)
+ md->md_flags |= LNET_MD_FLAG_DISCARD;
+
if (md->md_rspt_ptr != NULL)
lnet_detach_rsp_tracker(md, cpt);
return 0;
}
-EXPORT_SYMBOL(LNetMDUnlink);
+EXPORT_SYMBOL(__LNetMDUnlink);
+
+bool
+lnet_md_discarded(struct lnet_libmd *md)
+{
+ bool rc;
+ int cpt;
+
+ if (md == NULL)
+ return false;
+
+ cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
+ lnet_res_lock(cpt);
+ rc = md->md_flags & LNET_MD_FLAG_DISCARD;
+ lnet_res_unlock(cpt);
+
+ return rc;
+}
+EXPORT_SYMBOL(lnet_md_discarded);
return req->rq_receiving_reply;
}
+#define ptlrpc_cli_wait_unlink(req) __ptlrpc_cli_wait_unlink(req, NULL)
+
static inline int
-ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
+__ptlrpc_cli_wait_unlink(struct ptlrpc_request *req, bool *discard)
{
int rc;
return 1;
}
+ if (discard) {
+ *discard = false;
+ if (req->rq_reply_unlinked && req->rq_req_unlinked == 0) {
+ *discard = true;
+ spin_unlock(&req->rq_lock);
+ return 1; /* Should call again after LNetMDUnlink */
+ }
+ }
+
rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
req->rq_receiving_reply;
spin_unlock(&req->rq_lock);
* not corrupt any data.
*/
if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
- ptlrpc_client_recv_or_unlink(req))
+ ptlrpc_cli_wait_unlink(req))
continue;
if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
ptlrpc_client_bulk_active(req))
/*
* Check if we still need to wait for unlink.
*/
- if (ptlrpc_client_recv_or_unlink(req) ||
+ if (ptlrpc_cli_wait_unlink(req) ||
ptlrpc_client_bulk_active(req))
continue;
/* If there is no need to resend, fail it now. */
*/
static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
{
+ bool discard = false;
/*
* Might sleep.
*/
/*
* Nothing left to do.
*/
- if (!ptlrpc_client_recv_or_unlink(request))
+ if (!__ptlrpc_cli_wait_unlink(request, &discard))
RETURN(1);
LNetMDUnlink(request->rq_reply_md_h);
+ if (discard) /* Discard the request-out callback */
+ __LNetMDUnlink(request->rq_req_md_h, discard);
+
/*
* Let's check it once again.
*/
- if (!ptlrpc_client_recv_or_unlink(request))
+ if (!ptlrpc_cli_wait_unlink(request))
RETURN(1);
/* Move to "Unregistering" phase as reply was not unlinked yet. */
while (seconds > 0 &&
wait_event_idle_timeout(
*wq,
- !ptlrpc_client_recv_or_unlink(request),
+ !ptlrpc_cli_wait_unlink(request),
cfs_time_seconds(1)) == 0)
seconds -= 1;
if (seconds > 0) {
RETURN (0);
}
-static void mdunlink_iterate_helper(struct lnet_handle_md *bd_mds, int count)
+#define mdunlink_iterate_helper(mds, count) \
+ __mdunlink_iterate_helper(mds, count, false)
+static void __mdunlink_iterate_helper(struct lnet_handle_md *bd_mds,
+ int count, bool discard)
{
int i;
for (i = 0; i < count; i++)
- LNetMDUnlink(bd_mds[i]);
+ __LNetMDUnlink(bd_mds[i], discard);
}
#ifdef HAVE_SERVER_SUPPORT
* but we must still wait_event_idle_timeout() in this case, to give
* us a chance to run server_bulk_callback()
*/
- mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+ __mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw, true);
for (;;) {
/* Network access will complete in finite time but the HUGE