From b1827ff1da829ae5f320a417217757221eedda5f Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Wed, 22 Aug 2018 22:22:49 +0300 Subject: [PATCH] LU-7558 ptlrpc: connect vs import invalidate race Connect can't be sent while import invalidate is in progress, thus it leaves the import in not initialized state. Don't allow reconnect in evicted state. Change-Id: I79a1a1eb05fede30e100ba09b6f3f98636a46213 Cray-bug-id: LUS-6322 Signed-off-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-by: Andrew Perepechko Reviewed-on: https://review.whamcloud.com/33718 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ptlrpc/import.c | 6 ++++++ lustre/ptlrpc/recover.c | 2 ++ lustre/tests/recovery-small.sh | 19 +++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 1aed5f9..a5361e1 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -437,6 +437,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 +#define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 #define OBD_FAIL_OBD_PING_NET 0x600 /* OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 obsolete since 1.5 */ diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 6cf814b7..6bb1666 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -37,6 +37,7 @@ #define DEBUG_SUBSYSTEM S_RPC #include +#include #include #include #include @@ -291,6 +292,10 @@ void ptlrpc_invalidate_import(struct obd_import *imp) if (!imp->imp_invalid || imp->imp_obd->obd_no_recov) ptlrpc_deactivate_import(imp); + if (OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CONNECT_RACE)) { + OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + msleep(10 * MSEC_PER_SEC); + } CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2); LASSERT(imp->imp_invalid); @@ -668,6 +673,7 @@ int ptlrpc_connect_import(struct obd_import *imp) CERROR("already connected\n"); RETURN(0); } else if (imp->imp_state == LUSTRE_IMP_CONNECTING || + imp->imp_state == LUSTRE_IMP_EVICTED || imp->imp_connected) { spin_unlock(&imp->imp_lock); CERROR("already connecting\n"); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 873adb1..ab93c45 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -346,6 +346,8 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) if (rc) GOTO(out, rc); + OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + rc = ptlrpc_connect_import(imp); if (rc) GOTO(out, rc); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 84e1a44..12ae698 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1593,6 +1593,25 @@ test_65() { } run_test 65 "lock enqueue for destroyed export" +test_67() +{ +#define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 + $LCTL set_param fail_loc=0x80000531 + + local mdtname="MDT0000" + local mdccli=$($LCTL dl | grep "${mdtname}-mdc" | awk '{print $4;}') + local conn_uuid=$($LCTL get_param -n mdc.${mdccli}.mds_conn_uuid) + $LCTL set_param "mdc.${mdccli}.import=connection=${conn_uuid}" & + sleep 2 + + mds_evict_client + sleep 1 + + client_reconnect + wait +} +run_test 67 "connect vs import invalidate race" + check_cli_ir_state() { local NODE=${1:-$HOSTNAME} -- 1.8.3.1