From 42d8cb04637b62d91deb9ef047f1d9ef8473eca6 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Mon, 11 Feb 2019 14:27:54 +0300 Subject: [PATCH] LU-11951 ptlrpc: reset generation for old requests all requests generated while the import is changing from FULL to IDLE need to be moved to the new generation. Change-Id: I59d9b92680c724132dba9c7315c26e9851c5d5d2 Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/34221 Reviewed-by: Patrick Farrell Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ptlrpc/import.c | 19 ++++++++++++++++++- lustre/target/tgt_handler.c | 2 ++ lustre/tests/sanity.sh | 21 +++++++++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 4cc253d..d958bae 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -340,6 +340,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_STATFS_DELAY 0x242 #define OBD_FAIL_OST_INTEGRITY_FAULT 0x243 #define OBD_FAIL_OST_INTEGRITY_CMP 0x244 +#define OBD_FAIL_OST_DISCONNECT_DELAY 0x245 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 0ec9bc3..30a7ce6 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1674,6 +1674,22 @@ out: } EXPORT_SYMBOL(ptlrpc_disconnect_import); +static void ptlrpc_reset_reqs_generation(struct obd_import *imp) +{ + struct ptlrpc_request *old, *tmp; + + /* tag all resendable requests generated before disconnection + * notice this code is part of disconnect-at-idle path only */ + list_for_each_entry_safe(old, tmp, &imp->imp_delayed_list, + rq_list) { + spin_lock(&old->rq_lock); + if (old->rq_import_generation == imp->imp_generation - 1 && + !old->rq_no_resend) + old->rq_import_generation = imp->imp_generation; + spin_unlock(&old->rq_lock); + } +} + static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *args, int rc) @@ -1681,7 +1697,7 @@ static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env, struct obd_import *imp = req->rq_import; int connect = 0; - DEBUG_REQ(D_HA, req, "inflight=%d, refcount=%d: rc = %d\n", + DEBUG_REQ(D_HA, req, "inflight=%d, refcount=%d: rc = %d ", atomic_read(&imp->imp_inflight), atomic_read(&imp->imp_refcount), rc); @@ -1700,6 +1716,7 @@ static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env, imp->imp_generation++; imp->imp_initiated_at = imp->imp_generation; IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_NEW); + ptlrpc_reset_reqs_generation(imp); connect = 1; } } diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index 6db1270..0330be3 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -1011,6 +1011,8 @@ int tgt_disconnect(struct tgt_session_info *tsi) ENTRY; + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DISCONNECT_DELAY, cfs_fail_val); + rc = target_handle_disconnect(tgt_ses_req(tsi)); if (rc) RETURN(err_serious(rc)); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 8b858b6..c9d6186 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -20282,6 +20282,27 @@ test_811() { } run_test 811 "orphan name stub can be cleaned up in startup" +test_812() { + [ $OST1_VERSION -lt $(version_code 2.12.51) ] && + skip "OST < 2.12.51 doesn't support this fail_loc" + + $LFS setstripe -c 1 -i 0 $DIR/$tfile + # ensure ost1 is connected + stat $DIR/$tfile >/dev/null || error "can't stat" + wait_osc_import_state client ost1 FULL + # no locks, no reqs to let the connection idle + cancel_lru_locks osc + + # delay OST_DISCONNECT on OST1 to put OSC into intermediate state +#define OBD_FAIL_OST_DISCONNECT_DELAY 0x245 + do_facet ost1 "$LCTL set_param fail_loc=0x245 fail_val=8" + wait_osc_import_state client ost1 CONNECTING + do_facet ost1 "$LCTL set_param fail_loc=0 fail_val=0" + + stat $DIR/$tfile >/dev/null || error "can't stat file" +} +run_test 812 "do not drop reqs generated when imp is going to idle (LU-11951)" + # # tests that do cleanup/setup should be run at the end # -- 1.8.3.1