From a8ba5c645f91faf86a84c99dd2cc049bc54e12b1 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Tue, 28 Oct 2014 16:56:41 -0700 Subject: [PATCH] LU-4181 tests: cleanup lustre before starting lnet-selftest.sh Cleanup lustre before starting lnet-selftest.sh to attempt and identify why RPC requests are timing out. The approach is to land this patch and monitor the failure rate for LU-4181 to see if it's been reduced. The current failure rate is approximately 1 failure per day. Added a debug message to print out rpc stats when an RPC error occurs. Signed-off-by: Amir Shehata Change-Id: I86cbc00af30b165918a57e6665ad725a9926c089 Reviewed-on: http://review.whamcloud.com/12469 Tested-by: Jenkins Reviewed-by: Isaac Huang Tested-by: Maloo Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- lnet/selftest/rpc.c | 8 ++++++++ lustre/tests/lnet-selftest.sh | 7 ++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lnet/selftest/rpc.c b/lnet/selftest/rpc.c index 91c7009..f9bb996 100644 --- a/lnet/selftest/rpc.c +++ b/lnet/selftest/rpc.c @@ -1460,6 +1460,14 @@ srpc_lnet_ev_handler(lnet_event_t *ev) if (ev->status != 0) { spin_lock(&srpc_data.rpc_glock); srpc_data.rpc_counters.errors++; + CERROR("ev->status = %d, ev->type = %d, errors = %u, " + "rpcs_sent = %u, rpcs_rcvd = %u, rpcs_dropped = %u, " + "rpcs_expired = %u\n", + ev->status, ev->type, srpc_data.rpc_counters.errors, + srpc_data.rpc_counters.rpcs_sent, + srpc_data.rpc_counters.rpcs_rcvd, + srpc_data.rpc_counters.rpcs_dropped, + srpc_data.rpc_counters.rpcs_expired); spin_unlock(&srpc_data.rpc_glock); } diff --git a/lustre/tests/lnet-selftest.sh b/lustre/tests/lnet-selftest.sh index 386d3d2..6b0dd02 100755 --- a/lustre/tests/lnet-selftest.sh +++ b/lustre/tests/lnet-selftest.sh @@ -56,9 +56,10 @@ fi # 2) it's theoretically possible that lst tests congest comm paths so tightly # that mounted lustre wouldn't able to perform some of its background activities if is_mounted $MOUNT || is_mounted $MOUNT2; then - local_mode && CLIENTONLY=yes - stopall - RESTORE_MOUNT=yes + local_mode && CLIENTONLY=yes + RESTORE_MOUNT=yes + LOAD_MODULES_REMOTE=true + cleanupall fi build_test_filter -- 1.8.3.1