From d5a51f0b718ecf6fca81e15c396e56141b62df6c Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Wed, 22 Aug 2018 22:22:49 +0300 Subject: [PATCH] LU-7558 ptlrpc: connect vs import invalidate race Connect can't be sent while import invalidate is in progress, thus it leaves the import in not initialized state. Don't allow reconnect in evicted state. Lustre-change: https://review.whamcloud.com/33718 Lustre-commit: b1827ff1da829ae5f320a417217757221eedda5f Change-Id: I79a1a1eb05fede30e100ba09b6f3f98636a46213 Cray-bug-id: LUS-6322 Signed-off-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-by: Andrew Perepechko Reviewed-by: Mike Pershin Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/34290 Reviewed-by: Alex Zhuravlev Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 2 ++ lustre/ptlrpc/import.c | 6 ++++++ lustre/ptlrpc/recover.c | 2 ++ lustre/tests/recovery-small.sh | 19 +++++++++++++++++++ 4 files changed, 29 insertions(+) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 549b151..59f3ab5 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -426,6 +426,8 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 +#define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 +#define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 6851cec..827a989 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -37,6 +37,7 @@ #define DEBUG_SUBSYSTEM S_RPC #include +#include #include #include #include @@ -292,6 +293,10 @@ void ptlrpc_invalidate_import(struct obd_import *imp) if (!imp->imp_invalid || imp->imp_obd->obd_no_recov) ptlrpc_deactivate_import(imp); + if (OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CONNECT_RACE)) { + OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + msleep(10 * MSEC_PER_SEC); + } CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2); LASSERT(imp->imp_invalid); @@ -666,6 +671,7 @@ int ptlrpc_connect_import(struct obd_import *imp) CERROR("already connected\n"); RETURN(0); } else if (imp->imp_state == LUSTRE_IMP_CONNECTING || + imp->imp_state == LUSTRE_IMP_EVICTED || imp->imp_connected) { spin_unlock(&imp->imp_lock); CERROR("already connecting\n"); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index c526e9e..aacb929 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -346,6 +346,8 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) if (rc) GOTO(out, rc); + OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE); + rc = ptlrpc_connect_import(imp); if (rc) GOTO(out, rc); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 0a66e58..c4edaa0 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1601,6 +1601,25 @@ test_65() { } run_test 65 "lock enqueue for destroyed export" +test_67() +{ +#define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 + $LCTL set_param fail_loc=0x80000531 + + local mdtname="MDT0000" + local mdccli=$($LCTL dl | grep "${mdtname}-mdc" | awk '{print $4;}') + local conn_uuid=$($LCTL get_param -n mdc.${mdccli}.mds_conn_uuid) + $LCTL set_param "mdc.${mdccli}.import=connection=${conn_uuid}" & + sleep 2 + + mds_evict_client + sleep 1 + + client_reconnect + wait +} +run_test 67 "connect vs import invalidate race" + check_cli_ir_state() { local NODE=${1:-$HOSTNAME} -- 1.8.3.1