From df5d7a7816e8472397e5f99fd2d44d4cd2a4754d Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Tue, 16 Jul 2019 15:26:43 -0400 Subject: [PATCH] LU-12559 ptlrpc: Hold imp lock for idle reconnect Idle reconnect sets import state to IMP_NEW, then releases the import lock before calling ptlrpc_connect_import. This creates a gap where an import in IMP_NEW state is exposed, which can cause new requests to fail with EIO. Hold the lock across the call so as not to expose imports in this state. Lustre-change: https://review.whamcloud.com/35530 Lustre-commit: e9472c54ac820c3a0db2318a6ef894c3971e6e0b Signed-off-by: Patrick Farrell Change-Id: I9f8509d11c4d5a8917a313349534d98b964cd588 Reviewed-by: Alex Zhuravlev Reviewed-by: Wang Shilong Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/36215 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 1 + lustre/ptlrpc/client.c | 15 +++++++-------- lustre/ptlrpc/import.c | 19 +++++++++++++++---- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 56ed28e..2d4e08b 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2320,6 +2320,7 @@ void ptlrpc_hr_fini(void); * @{ */ int ptlrpc_connect_import(struct obd_import *imp); +int ptlrpc_connect_import_locked(struct obd_import *imp); int ptlrpc_init_import(struct obd_import *imp); int ptlrpc_disconnect_import(struct obd_import *imp, int noclose); int ptlrpc_disconnect_and_idle_import(struct obd_import *imp); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index aaf5786..4385153 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -864,8 +864,7 @@ ptlrpc_request_alloc_internal(struct obd_import *imp, struct ptlrpc_request_pool * pool, const struct req_format *format) { - struct ptlrpc_request *request; - int connect = 0; + struct ptlrpc_request *request; request = __ptlrpc_request_alloc(imp, pool); if (request == NULL) @@ -883,17 +882,17 @@ ptlrpc_request_alloc_internal(struct obd_import *imp, if (imp->imp_state == LUSTRE_IMP_IDLE) { imp->imp_generation++; imp->imp_initiated_at = imp->imp_generation; - imp->imp_state = LUSTRE_IMP_NEW; - connect = 1; - } - spin_unlock(&imp->imp_lock); - if (connect) { - rc = ptlrpc_connect_import(imp); + imp->imp_state = LUSTRE_IMP_NEW; + + /* connect_import_locked releases imp_lock */ + rc = ptlrpc_connect_import_locked(imp); if (rc < 0) { ptlrpc_request_free(request); return NULL; } ptlrpc_pinger_add_import(imp); + } else { + spin_unlock(&imp->imp_lock); } } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 72781f9..2e8e53e 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -656,13 +656,22 @@ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) return 0; } +int ptlrpc_connect_import(struct obd_import *imp) +{ + spin_lock(&imp->imp_lock); + return ptlrpc_connect_import_locked(imp); +} + /** * Attempt to (re)connect import \a imp. This includes all preparations, * initializing CONNECT RPC request and passing it to ptlrpcd for * actual sending. + * + * Assumes imp->imp_lock is held, and releases it. + * * Returns 0 on success or error code. */ -int ptlrpc_connect_import(struct obd_import *imp) +int ptlrpc_connect_import_locked(struct obd_import *imp) { struct obd_device *obd = imp->imp_obd; int initial_connect = 0; @@ -680,7 +689,8 @@ int ptlrpc_connect_import(struct obd_import *imp) int rc; ENTRY; - spin_lock(&imp->imp_lock); + assert_spin_locked(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_CLOSED) { spin_unlock(&imp->imp_lock); CERROR("can't connect to a closed import\n"); @@ -1756,12 +1766,13 @@ static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env, connect = 1; } } - spin_unlock(&imp->imp_lock); if (connect) { - rc = ptlrpc_connect_import(imp); + rc = ptlrpc_connect_import_locked(imp); if (rc >= 0) ptlrpc_pinger_add_import(imp); + } else { + spin_unlock(&imp->imp_lock); } return 0; -- 1.8.3.1