From 69d1e9805172c3e8da59ad99f470831951253695 Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Wed, 22 Aug 2018 22:22:49 +0300 Subject: [PATCH] LU-7558 ptlrpc: connect vs import invalidate race Connect can't be sent while import invalidate is in progress, thus it leaves the import in not initialized state. Don't allow reconnect in evicted state. Lustre-change: https://review.whamcloud.com/33718 Lustre-commit: b1827ff1da829ae5f320a417217757221eedda5f Change-Id: I79a1a1eb05fede30e100ba09b6f3f98636a46213 Cray-bug-id: LUS-6322 Signed-off-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-by: Andrew Perepechko Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/34293 Tested-by: Jenkins Tested-by: Maloo --- lustre/include/obd_support.h | 1 + lustre/ptlrpc/import.c | 6 ++++++ lustre/ptlrpc/recover.c | 2 ++ lustre/tests/recovery-small.sh | 19 +++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ec5c90e..f8a0e6f 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -438,6 +438,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 +#define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 #define OBD_FAIL_OBD_PING_NET 0x600 /* OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 obsolete since 1.5 */ diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 8ca1dec..db7e5e3 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -37,6 +37,7 @@ #define DEBUG_SUBSYSTEM S_RPC #include +#include #include #include #include @@ -291,6 +292,10 @@ void ptlrpc_invalidate_import(struct obd_import *imp) if (!imp->imp_invalid || imp->imp_obd->obd_no_recov) ptlrpc_deactivate_import(imp); + if (OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CONNECT_RACE)) { + OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + msleep(10 * MSEC_PER_SEC); + } CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2); LASSERT(imp->imp_invalid); @@ -668,6 +673,7 @@ int ptlrpc_connect_import(struct obd_import *imp) CERROR("already connected\n"); RETURN(0); } else if (imp->imp_state == LUSTRE_IMP_CONNECTING || + imp->imp_state == LUSTRE_IMP_EVICTED || imp->imp_connected) { spin_unlock(&imp->imp_lock); CERROR("already connecting\n"); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index b054f9a..4d5f28b 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -346,6 +346,8 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) if (rc) GOTO(out, rc); + OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + rc = ptlrpc_connect_import(imp); if (rc) GOTO(out, rc); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 84e1a44..12ae698 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1593,6 +1593,25 @@ test_65() { } run_test 65 "lock enqueue for destroyed export" +test_67() +{ +#define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 + $LCTL set_param fail_loc=0x80000531 + + local mdtname="MDT0000" + local mdccli=$($LCTL dl | grep "${mdtname}-mdc" | awk '{print $4;}') + local conn_uuid=$($LCTL get_param -n mdc.${mdccli}.mds_conn_uuid) + $LCTL set_param "mdc.${mdccli}.import=connection=${conn_uuid}" & + sleep 2 + + mds_evict_client + sleep 1 + + client_reconnect + wait +} +run_test 67 "connect vs import invalidate race" + check_cli_ir_state() { local NODE=${1:-$HOSTNAME} -- 1.8.3.1